Błąd podczas drukowania DataFrame zawierającej NaN z Pandas 0.12.0 i Matplotlib 1.3.1 w Pythonie 3.3.2
Po pierwsze, to pytanie jestnie taki sam jakten.
Mam problem z tym, że kiedy próbuję wykreślić ramkę DataFrame zawierającą numeryczną wartość NaN w jednej komórce, pojawia się błąd:
C:\>\Python33x86\python.exe
Python 3.3.2 (v3.3.2:d047928ae3f6, May 16 2013, 00:03:43) [MSC v.1600 32 bit (Intel)] on win32
Type "help", "copyright", "credits" or "license" for more information.
>>> import pandas as pd
>>> import numpy as np
>>> import matplotlib.pyplot as plt
>>>
>>> dates = pd.date_range('20131201', periods=5, freq='H')
>>> data = [[1, 2], [4, 5], [9, np.nan], [16, 17], [25, 26]]
>>> df = pd.DataFrame(data, index=dates,
... columns=list('AB'))
>>>
>>> print(df.to_string())
A B
2013-12-01 00:00:00 1 2
2013-12-01 01:00:00 4 5
2013-12-01 02:00:00 9 NaN
2013-12-01 03:00:00 16 17
2013-12-01 04:00:00 25 26
>>> df.plot()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Python33x86\lib\site-packages\pandas\tools\plotting.py", line 1636, in plot_frame
plot_obj.generate()
File "C:\Python33x86\lib\site-packages\pandas\tools\plotting.py", line 856, in generate
self._make_plot()
File "C:\Python33x86\lib\site-packages\pandas\tools\plotting.py", line 1240, in _make_plot
self._make_ts_plot(data, **self.kwds)
File "C:\Python33x86\lib\site-packages\pandas\tools\plotting.py", line 1321, in _make_ts_plot
_plot(data[col], i, ax, label, style, **kwds)
File "C:\Python33x86\lib\site-packages\pandas\tools\plotting.py", line 1295, in _plot
style=style, **kwds)
File "C:\Python33x86\lib\site-packages\pandas\tseries\plotting.py", line 77, in tsplot
lines = plotf(ax, *args, **kwargs)
File "C:\Python33x86\lib\site-packages\matplotlib\axes.py", line 4139, in plot
for line in self._get_lines(*args, **kwargs):
File "C:\Python33x86\lib\site-packages\matplotlib\axes.py", line 319, in _grab_next_args
for seg in self._plot_args(remaining, kwargs):
File "C:\Python33x86\lib\site-packages\matplotlib\axes.py", line 297, in _plot_args
x, y = self._xy_from_xy(x, y)
File "C:\Python33x86\lib\site-packages\matplotlib\axes.py", line 216, in _xy_from_xy
by = self.axes.yaxis.update_units(y)
File "C:\Python33x86\lib\site-packages\matplotlib\axis.py", line 1337, in update_units
converter = munits.registry.get_converter(data)
File "C:\Python33x86\lib\site-packages\matplotlib\units.py", line 137, in get_converter
xravel = x.ravel()
File "C:\Python33x86\lib\site-packages\numpy\ma\core.py", line 3969, in ravel
r._mask = ndarray.ravel(self._mask).reshape(r.shape)
File "C:\Python33x86\lib\site-packages\pandas\core\series.py", line 981, in reshape
return ndarray.reshape(self, newshape, order)
TypeError: an integer is required
Powyższy kod działa, jeśli zastąpię np.NaN numerem, takim jak „2.3”.
Plotowanie jako dwie oddzielne serie również nie działa (kończy się niepowodzeniem po dodaniu serii zawierającej NaN do wykresu):
C:\>\Python33x86\python.exe
Python 3.3.2 (v3.3.2:d047928ae3f6, May 16 2013, 00:03:43) [MSC v.1600 32 bit (Intel)] on win32
Type "help", "copyright", "credits" or "license" for more information.
>>> import pandas as pd
>>> import numpy as np
>>> import matplotlib.pyplot as plt
>>>
>>> dates = pd.date_range('20131201', periods=5, freq='H')
>>> data = [[1, 2], [4, 5], [9, np.nan], [16, 17], [25, 26]]
>>> df = pd.DataFrame(data, index=dates,
... columns=list('AB'))
>>>
>>> print(df.to_string())
A B
2013-12-01 00:00:00 1 2
2013-12-01 01:00:00 4 5
2013-12-01 02:00:00 9 NaN
2013-12-01 03:00:00 16 17
2013-12-01 04:00:00 25 26
>>> df['A'].plot(label='This is A', style='k')
<matplotlib.axes.AxesSubplot object at 0x02ACFF90>
>>> df['B'].plot(label='This is B', style='g')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Python33x86\lib\site-packages\pandas\tools\plotting.py", line 1730, in plot_series
plot_obj.generate()
File "C:\Python33x86\lib\site-packages\pandas\tools\plotting.py", line 856, in generate
self._make_plot()
File "C:\Python33x86\lib\site-packages\pandas\tools\plotting.py", line 1240, in _make_plot
self._make_ts_plot(data, **self.kwds)
File "C:\Python33x86\lib\site-packages\pandas\tools\plotting.py", line 1311, in _make_ts_plot
_plot(data, 0, ax, label, self.style, **kwds)
File "C:\Python33x86\lib\site-packages\pandas\tools\plotting.py", line 1295, in _plot
style=style, **kwds)
File "C:\Python33x86\lib\site-packages\pandas\tseries\plotting.py", line 77, in tsplot
lines = plotf(ax, *args, **kwargs)
File "C:\Python33x86\lib\site-packages\matplotlib\axes.py", line 4139, in plot
for line in self._get_lines(*args, **kwargs):
File "C:\Python33x86\lib\site-packages\matplotlib\axes.py", line 319, in _grab_next_args
for seg in self._plot_args(remaining, kwargs):
File "C:\Python33x86\lib\site-packages\matplotlib\axes.py", line 297, in _plot_args
x, y = self._xy_from_xy(x, y)
File "C:\Python33x86\lib\site-packages\matplotlib\axes.py", line 216, in _xy_from_xy
by = self.axes.yaxis.update_units(y)
File "C:\Python33x86\lib\site-packages\matplotlib\axis.py", line 1337, in update_units
converter = munits.registry.get_converter(data)
File "C:\Python33x86\lib\site-packages\matplotlib\units.py", line 137, in get_converter
xravel = x.ravel()
File "C:\Python33x86\lib\site-packages\numpy\ma\core.py", line 3969, in ravel
r._mask = ndarray.ravel(self._mask).reshape(r.shape)
File "C:\Python33x86\lib\site-packages\pandas\core\series.py", line 981, in reshape
return ndarray.reshape(self, newshape, order)
TypeError: an integer is required
Jednakże, jeśli zrobię to bezpośrednio za pomocą wykresu Pyplot Matplotliba (), zamiast używać funkcji plot () Pandas, działa:
C:\>\Python33x86\python.exe
Python 3.3.2 (v3.3.2:d047928ae3f6, May 16 2013, 00:03:43) [MSC v.1600 32 bit (Intel)] on win32
Type "help", "copyright", "credits" or "license" for more information.
>>> import pandas as pd
>>> import numpy as np
>>> import matplotlib.pyplot as plt
>>> dates = pd.date_range('20131201', periods=5, freq='H')
>>> plt.plot(dates, [1, 4, 9, 16, 25], 'k', dates, [2, 5, np.NAN, 17, 26], 'g')
[<matplotlib.lines.Line2D object at 0x03E98650>, <matplotlib.lines.Line2D object at 0x040929B0>]
>>> plt.show()
>>>
Wydaje się więc, że mam obejście problemu, ale jako że wykreślam duże DataFrames, wolałbym użyć metody plot () Pandasa, co jest wygodniejsze. Próbowałem śledzić ślad stosu, ale po pewnym czasie komplikuje się (nie znam kodu źródłowego Pandas, Numpy i Matplotlib). Czy robię coś złego, czy jest to możliwy błąd w fabule Pandas ()?
Dziękuję za pomoc!
Próbowałem zarówno na Windows x86, jak i Linux AMD64 z tymi samymi wynikami w tych wersjach:
Python 3.3.2Pandy 0.12.0Matplotlib 1.3.1Numpy 1.7.1