From a8d3b73c258e6e1f7e258f2c1dca32f6505a2696 Mon Sep 17 00:00:00 2001 From: Nicholas Stahl Date: Tue, 27 Oct 2015 14:40:56 -0400 Subject: [PATCH 01/10] BUG: fix Panel.fillna() ignoring axis parameter --- pandas/core/common.py | 29 +++++++++++ pandas/core/generic.py | 33 ++++-------- pandas/core/internals.py | 13 ++--- pandas/tests/test_panel.py | 72 +++++++++++++++++++++++++- pandas/tests/test_panel4d.py | 97 +++++++++++++++++++++++++++++++++++- 5 files changed, 212 insertions(+), 32 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index d6aa6e6bb90cc..f2549298cfbee 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -1764,6 +1764,35 @@ def interpolate_2d(values, method='pad', axis=0, limit=None, fill_value=None, dt return values +def interpolate_nd(values, method='pad', axis=0, limit=None, fill_value=None, dtype=None): + + ndim = values.ndim + shape = values.shape + + if ndim < 3: + raise AssertionError("This function should only be used on arrays of " + "dimensionality 3 or higher") + func = interpolate_2d if ndim == 3 else interpolate_nd + method = partial(func, method=method, limit=limit, fill_value=fill_value, dtype=dtype) + + if ndim == 3: + if axis == 0: + for n in range(shape[1]): + values[:,n] = method(values[:,n], axis=1) + else: + for n in range(shape[0]): + values[n] = method(values[n], axis=(1 if axis == 1 else 0)) + else: + if axis == 0: + for n in range(shape[1]): + values[:,n] = method(values[:,n], axis=0) + else: + for n in range(shape[0]): + values[n] = method(values[n], axis=axis-1) + + return values + + def _consensus_name_attr(objs): name = objs[0].name for obj in objs[1:]: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c18f4ec0a1f47..45c3c8edaf765 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2773,39 +2773,24 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, # set the default here, so functions examining the signaure # can detect if something was set (e.g. in groupby) (GH9221) if axis is None: - axis = 0 + axis = self._stat_axis_name axis = self._get_axis_number(axis) method = com._clean_fill_method(method) - from pandas import DataFrame if value is None: if method is None: raise ValueError('must specify a fill method or value') - if self._is_mixed_type and axis == 1: - if inplace: + if self._is_mixed_type: + if ((self.ndim > 2) and (axis == 0)) or inplace: raise NotImplementedError() - result = self.T.fillna(method=method, limit=limit).T - - # need to downcast here because of all of the transposes - result._data = result._data.downcast() - - return result - - # > 3d - if self.ndim > 3: - raise NotImplementedError( - 'Cannot fillna with a method for > 3dims' - ) + elif (self.ndim == 2) and (axis == 1): + result = self.T.fillna(method=method, limit=limit).T - # 3d - elif self.ndim == 3: + # need to downcast here because of all of the transposes + result._data = result._data.downcast() - # fill in 2d chunks - result = dict([(col, s.fillna(method=method, value=value)) - for col, s in compat.iteritems(self)]) - return self._constructor.from_dict(result).__finalize__(self) + return result - # 2d or less method = com._clean_fill_method(method) new_data = self._data.interpolate(method=method, axis=axis, @@ -2813,7 +2798,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, inplace=inplace, coerce=True, downcast=downcast) - else: + else: if method is not None: raise ValueError('cannot specify both a fill method and value') diff --git a/pandas/core/internals.py b/pandas/core/internals.py index b3e7e82b5feb7..75f6403523298 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -909,12 +909,13 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False, values = self.values if inplace else self.values.copy() values, _, fill_value, _ = self._try_coerce_args(values, fill_value) values = self._try_operate(values) - values = com.interpolate_2d(values, - method=method, - axis=axis, - limit=limit, - fill_value=fill_value, - dtype=self.dtype) + interp_func = com.interpolate_nd if values.ndim > 2 else com.interpolate_2d + values = interp_func(values, + method=method, + axis=axis, + limit=limit, + fill_value=fill_value, + dtype=self.dtype) values = self._try_coerce_result(values) blocks = [self.make_block(values, diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 1f8bcf8c9879f..ecf05382a501c 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1454,20 +1454,90 @@ def test_fillna(self): assert_frame_equal(filled['ItemA'], panel['ItemA'].fillna(method='backfill')) + # Fill forward. + filled = self.panel.fillna(method='ffill') + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='ffill')) + + # With limit. + filled = self.panel.fillna(method='backfill', limit=1) + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='backfill', limit=1)) + + # With downcast. + rounded = self.panel.apply(lambda x: x.apply(np.round)) + filled = rounded.fillna(method='backfill', downcast='infer') + assert_frame_equal(filled['ItemA'], + rounded['ItemA'].fillna(method='backfill', downcast='infer')) + + # Now explicitly request axis 1. + filled = self.panel.fillna(method='backfill', axis=1) + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='backfill', axis=0)) + + # Fill along axis 2, equivalent to filling along axis 1 of each + # DataFrame. + filled = self.panel.fillna(method='backfill', axis=2) + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='backfill', axis=1)) + + # Fill an empty panel. empty = self.panel.reindex(items=[]) filled = empty.fillna(0) assert_panel_equal(filled, empty) + # either method or value must be specified self.assertRaises(ValueError, self.panel.fillna) + # method and value can not both be specified self.assertRaises(ValueError, self.panel.fillna, 5, method='ffill') + # can't pass list or tuple, only scalar self.assertRaises(TypeError, self.panel.fillna, [1, 2]) self.assertRaises(TypeError, self.panel.fillna, (1, 2)) # limit not implemented when only value is specified p = Panel(np.random.randn(3,4,5)) p.iloc[0:2,0:2,0:2] = np.nan - self.assertRaises(NotImplementedError, lambda : p.fillna(999,limit=1)) + self.assertRaises(NotImplementedError, lambda : p.fillna(999, limit=1)) + + def test_fillna_axis_0(self): + # GH 8395 + + # Forward fill along axis 0, interpolating values across DataFrames. + filled = self.panel.fillna(method='ffill', axis=0) + nan_indexes = self.panel['ItemB']['C'].index[ + self.panel['ItemB']['C'].apply(np.isnan)] + + # Values from ItemA are filled into ItemB. + assert_series_equal(filled['ItemB']['C'][nan_indexes], + self.panel['ItemA']['C'][nan_indexes]) + + # Backfill along axis 0. + filled = self.panel.fillna(method='backfill', axis=0) + + # The test data lacks values that can be backfilled on axis 0. + assert_panel_equal(filled, self.panel) + + # Reverse the panel and backfill along axis 0, to properly test + # backfill. + reverse_panel = self.panel.reindex_axis(reversed(self.panel.axes[0])) + filled = reverse_panel.fillna(method='bfill', axis=0) + nan_indexes = reverse_panel['ItemB']['C'].index[ + reverse_panel['ItemB']['C'].apply(np.isnan)] + assert_series_equal(filled['ItemB']['C'][nan_indexes], + reverse_panel['ItemA']['C'][nan_indexes]) + + # Fill along axis 0 with limit. + filled = self.panel.fillna(method='ffill', axis=0, limit=1) + a_nan = self.panel['ItemA']['C'].index[ + self.panel['ItemA']['C'].apply(np.isnan)] + b_nan = self.panel['ItemB']['C'].index[ + self.panel['ItemB']['C'].apply(np.isnan)] + + # Cells that are nan in ItemB but not in ItemA remain unfilled in + # ItemC. + self.assertTrue( + filled['ItemC']['C'][b_nan.diff(a_nan)].apply(np.isnan).all()) def test_ffill_bfill(self): assert_panel_equal(self.panel.ffill(), diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 3772d4b9c272b..dffb0ccc6effe 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -909,11 +909,106 @@ def test_sort_index(self): # assert_panel_equal(sorted_panel, self.panel) def test_fillna(self): + # GH 8395 self.assertFalse(np.isfinite(self.panel4d.values).all()) filled = self.panel4d.fillna(0) self.assertTrue(np.isfinite(filled.values).all()) - self.assertRaises(NotImplementedError, self.panel4d.fillna, method='pad') + filled = self.panel4d.fillna(method='backfill') + assert_frame_equal(filled['l1']['ItemA'], + self.panel4d['l1']['ItemA'].fillna(method='backfill')) + + panel4d = self.panel4d.copy() + panel4d['str'] = 'foo' + + filled = panel4d.fillna(method='backfill') + assert_frame_equal(filled['l1']['ItemA'], + panel4d['l1']['ItemA'].fillna(method='backfill')) + + # Fill forward. + filled = self.panel4d.fillna(method='ffill') + assert_frame_equal(filled['l1']['ItemA'], + self.panel4d['l1']['ItemA'].fillna(method='ffill')) + + # With limit. + filled = self.panel4d.fillna(method='backfill', limit=1) + assert_frame_equal(filled['l1']['ItemA'], + self.panel4d['l1']['ItemA'].fillna(method='backfill', limit=1)) + + # With downcast. + rounded = self.panel4d.apply(lambda x: x.apply(np.round)) + filled = rounded.fillna(method='backfill', downcast='infer') + assert_frame_equal(filled['l1']['ItemA'], + rounded['l1']['ItemA'].fillna(method='backfill', downcast='infer')) + + # Now explicitly request axis 2. + filled = self.panel4d.fillna(method='backfill', axis=2) + assert_frame_equal(filled['l1']['ItemA'], + self.panel4d['l1']['ItemA'].fillna(method='backfill', axis=0)) + + # Fill along axis 3, equivalent to filling along axis 1 of each + # DataFrame. + filled = self.panel4d.fillna(method='backfill', axis=3) + assert_frame_equal(filled['l1']['ItemA'], + self.panel4d['l1']['ItemA'].fillna(method='backfill', axis=1)) + + # Fill an empty panel. + empty = self.panel4d.reindex(items=[]) + filled = empty.fillna(0) + assert_panel4d_equal(filled, empty) + + # either method or value must be specified + self.assertRaises(ValueError, self.panel4d.fillna) + # method and value can not both be specified + self.assertRaises(ValueError, self.panel4d.fillna, 5, method='ffill') + + # can't pass list or tuple, only scalar + self.assertRaises(TypeError, self.panel4d.fillna, [1, 2]) + self.assertRaises(TypeError, self.panel4d.fillna, (1, 2)) + + # limit not implemented when only value is specified + p = Panel4D(np.random.randn(3,4,5,6)) + p.iloc[0:2,0:2,0:2,0:2] = np.nan + self.assertRaises(NotImplementedError, lambda : p.fillna(999, limit=1)) + + def test_fillna_axis_0(self): + # GH 8395 + + # Back fill along axis 0, interpolating values across Panels + filled = self.panel4d.fillna(method='bfill', axis=0) + nan_indexes = self.panel4d['l1']['ItemB']['C'].index[ + self.panel4d['l1']['ItemB']['C'].apply(np.isnan)] + + # Values from ItemC are filled into ItemB. + assert_series_equal(filled['l1']['ItemB']['C'][nan_indexes], + self.panel4d['l1']['ItemC']['C'][nan_indexes]) + + # Forward fill along axis 0. + filled = self.panel4d.fillna(method='ffill', axis=0) + + # The test data lacks values that can be backfilled on axis 0. + assert_panel4d_equal(filled, self.panel4d) + + # Reverse the panel and backfill along axis 0, to properly test + # forward fill. + reverse_panel = self.panel4d.reindex_axis(reversed(self.panel4d.axes[0])) + filled = reverse_panel.fillna(method='ffill', axis=0) + nan_indexes = reverse_panel['l3']['ItemB']['C'].index[ + reverse_panel['l3']['ItemB']['C'].apply(np.isnan)] + assert_series_equal(filled['l3']['ItemB']['C'][nan_indexes], + reverse_panel['l1']['ItemB']['C'][nan_indexes]) + + # Fill along axis 0 with limit. + filled = self.panel4d.fillna(method='bfill', axis=0, limit=1) + c_nan = self.panel4d['l1']['ItemC']['C'].index[ + self.panel4d['l1']['ItemC']['C'].apply(np.isnan)] + b_nan = self.panel4d['l1']['ItemB']['C'].index[ + self.panel4d['l1']['ItemB']['C'].apply(np.isnan)] + + # Cells that are nan in ItemB but not in ItemC remain unfilled in + # ItemA. + self.assertTrue( + filled['l1']['ItemA']['C'][b_nan.diff(c_nan)].apply(np.isnan).all()) def test_swapaxes(self): result = self.panel4d.swapaxes('labels', 'items') From c81fbb2e80b2fb06da5e02b92048f9144c28edf6 Mon Sep 17 00:00:00 2001 From: Nicholas Stahl Date: Tue, 27 Oct 2015 14:46:09 -0400 Subject: [PATCH 02/10] added line to whatsnew --- doc/source/whatsnew/v0.17.1.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt index e303adfd356da..228a04a0fc3c5 100755 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.txt @@ -116,3 +116,5 @@ Bug Fixes - Bug in ``to_excel`` with openpyxl 2.2+ and merging (:issue:`11408`) - Bug in ``DataFrame.to_dict()`` produces a ``np.datetime64`` object instead of ``Timestamp`` when only datetime is present in data (:issue:`11327`) + +- Bug in ``Panel.fillna()`` does not fill across axis 0 From 8fc35743fa3e9e70e55246dab1577393e0e5f6d4 Mon Sep 17 00:00:00 2001 From: Nicholas Stahl Date: Tue, 27 Oct 2015 14:56:47 -0400 Subject: [PATCH 03/10] fix indentation error --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 45c3c8edaf765..b1f5c84030f8d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2798,7 +2798,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, inplace=inplace, coerce=True, downcast=downcast) - else: + else: if method is not None: raise ValueError('cannot specify both a fill method and value') From 651f65c9153aaee1d7f60eab7e8ccb1b60552e33 Mon Sep 17 00:00:00 2001 From: Nicholas Stahl Date: Tue, 27 Oct 2015 15:21:22 -0400 Subject: [PATCH 04/10] restored dropped line --- pandas/core/generic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b1f5c84030f8d..cad7a61df0952 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2777,6 +2777,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, axis = self._get_axis_number(axis) method = com._clean_fill_method(method) + from pandas import DataFrame if value is None: if method is None: raise ValueError('must specify a fill method or value') From aed6edbd855a6a41e89fd4f1a3ad193e8cd15b48 Mon Sep 17 00:00:00 2001 From: Nicholas Stahl Date: Thu, 29 Oct 2015 17:08:38 -0400 Subject: [PATCH 05/10] rename interpolate_2d to fill_2d --- pandas/core/categorical.py | 4 ++-- pandas/core/internals.py | 6 +++--- pandas/core/missing.py | 31 ++++++++++++++++++++++++++++++- 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index e304684036766..d5bc9a99270e5 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -10,7 +10,7 @@ from pandas.core.algorithms import factorize from pandas.core.base import PandasObject, PandasDelegate import pandas.core.common as com -from pandas.core.missing import interpolate_2d +from pandas.core.missing import fill_2d from pandas.util.decorators import cache_readonly, deprecate_kwarg from pandas.core.common import (ABCSeries, ABCIndexClass, ABCPeriodIndex, ABCCategoricalIndex, @@ -1313,7 +1313,7 @@ def fillna(self, value=None, method=None, limit=None): if method is not None: values = self.to_dense().reshape(-1, len(self)) - values = interpolate_2d( + values = fill_2d( values, method, 0, None, value).astype(self.categories.dtype)[0] values = _get_codes_for_values(values, self.categories) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 5738525102cee..0ab970a6947f5 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -910,7 +910,7 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False, values = self.values if inplace else self.values.copy() values, _, fill_value, _ = self._try_coerce_args(values, fill_value) values = self._try_operate(values) - interp_func = mis.interpolate_nd if values.ndim > 2 else mis.interpolate_2d + interp_func = mis.fill_nd if values.ndim > 2 else mis.fill_2d values = interp_func(values, method=method, axis=axis, @@ -2359,7 +2359,7 @@ def make_block_same_class(self, values, placement, def interpolate(self, method='pad', axis=0, inplace=False, limit=None, fill_value=None, **kwargs): - values = mis.interpolate_2d( + values = mis.fill_2d( self.values.to_dense(), method, axis, limit, fill_value) return self.make_block_same_class(values=values, placement=self.mgr_locs) @@ -3775,7 +3775,7 @@ def reindex(self, new_axis, indexer=None, method=None, fill_value=None, # fill if needed if method is not None or limit is not None: - new_values = mis.interpolate_2d(new_values, method=method, + new_values = mis.fill_2d(new_values, method=method, limit=limit, fill_value=fill_value) if self._block.is_sparse: diff --git a/pandas/core/missing.py b/pandas/core/missing.py index f1143ad808b91..c68227406d52b 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -219,7 +219,7 @@ def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None, return new_y -def interpolate_2d(values, method='pad', axis=0, limit=None, fill_value=None, dtype=None): +def fill_2d(values, method='pad', axis=0, limit=None, fill_value=None, dtype=None): """ perform an actual interpolation of values, values will be make 2-d if needed fills inplace, returns the result """ @@ -252,6 +252,35 @@ def interpolate_2d(values, method='pad', axis=0, limit=None, fill_value=None, dt return values +def fill_nd(values, method='pad', axis=0, limit=None, fill_value=None, dtype=None): + + ndim = values.ndim + shape = values.shape + + if ndim < 3: + raise AssertionError("This function should only be used on arrays of " + "dimensionality 3 or higher") + func = fill_2d if ndim == 3 else fill_nd + method = partial(func, method=method, limit=limit, fill_value=fill_value, dtype=dtype) + + if ndim == 3: + if axis == 0: + for n in range(shape[1]): + values[:,n] = method(values[:,n], axis=1) + else: + for n in range(shape[0]): + values[n] = method(values[n], axis=(1 if axis == 1 else 0)) + else: + if axis == 0: + for n in range(shape[1]): + values[:,n] = method(values[:,n], axis=0) + else: + for n in range(shape[0]): + values[n] = method(values[n], axis=axis-1) + + return values + + def _interp_wrapper(f, wrap_dtype, na_override=None): def wrapper(arr, mask, limit=None): view = arr.view(wrap_dtype) From b83afacd0012bc2d4c55687a3e6a1177cd135bdd Mon Sep 17 00:00:00 2001 From: Nicholas Stahl Date: Fri, 30 Oct 2015 16:38:19 -0400 Subject: [PATCH 06/10] rename interpolate functions in missing.py --- pandas/core/categorical.py | 5 ++- pandas/core/generic.py | 2 +- pandas/core/internals.py | 19 ++++++----- pandas/core/missing.py | 66 +++++++++++++++++--------------------- 4 files changed, 42 insertions(+), 50 deletions(-) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index d5bc9a99270e5..c6b33557951d5 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -10,7 +10,7 @@ from pandas.core.algorithms import factorize from pandas.core.base import PandasObject, PandasDelegate import pandas.core.common as com -from pandas.core.missing import fill_2d +from pandas.core.missing import pad from pandas.util.decorators import cache_readonly, deprecate_kwarg from pandas.core.common import (ABCSeries, ABCIndexClass, ABCPeriodIndex, ABCCategoricalIndex, @@ -1313,8 +1313,7 @@ def fillna(self, value=None, method=None, limit=None): if method is not None: values = self.to_dense().reshape(-1, len(self)) - values = fill_2d( - values, method, 0, None, value).astype(self.categories.dtype)[0] + values = pad(values, method, 0, None, value).astype(self.categories.dtype)[0] values = _get_codes_for_values(values, self.categories) else: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 59606a66970d5..1179abcffe572 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2793,7 +2793,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, return result - method = com._clean_fill_method(method) + method = mis._clean_fill_method(method) new_data = self._data.interpolate(method=method, axis=axis, limit=limit, diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 0ab970a6947f5..972992067b4f9 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -910,13 +910,12 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False, values = self.values if inplace else self.values.copy() values, _, fill_value, _ = self._try_coerce_args(values, fill_value) values = self._try_operate(values) - interp_func = mis.fill_nd if values.ndim > 2 else mis.fill_2d - values = interp_func(values, - method=method, - axis=axis, - limit=limit, - fill_value=fill_value, - dtype=self.dtype) + values = mis.pad(values, + method=method, + axis=axis, + limit=limit, + fill_value=fill_value, + dtype=self.dtype) values = self._try_coerce_result(values) blocks = [self.make_block(values, @@ -951,8 +950,8 @@ def func(x): # process a 1-d slice, returning it # should the axis argument be handled below in apply_along_axis? - # i.e. not an arg to mis.interpolate_1d - return mis.interpolate_1d(index, x, method=method, limit=limit, + # i.e. not an arg to mis.interpolate + return mis.interpolate(index, x, method=method, limit=limit, limit_direction=limit_direction, fill_value=fill_value, bounds_error=False, **kwargs) @@ -2359,7 +2358,7 @@ def make_block_same_class(self, values, placement, def interpolate(self, method='pad', axis=0, inplace=False, limit=None, fill_value=None, **kwargs): - values = mis.fill_2d( + values = mis.pad( self.values.to_dense(), method, axis, limit, fill_value) return self.make_block_same_class(values=values, placement=self.mgr_locs) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index c68227406d52b..bc66b6542bcbf 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -49,9 +49,9 @@ def _clean_interp_method(method, **kwargs): return method -def interpolate_1d(xvalues, yvalues, method='linear', limit=None, - limit_direction='forward', - fill_value=None, bounds_error=False, order=None, **kwargs): +def interpolate(xvalues, yvalues, method='linear', limit=None, + limit_direction='forward', + fill_value=None, bounds_error=False, order=None, **kwargs): """ Logic for the 1-d interpolation. The result should be 1-d, inputs xvalues and yvalues will each be 1-d arrays of the same length. @@ -219,15 +219,38 @@ def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None, return new_y -def fill_2d(values, method='pad', axis=0, limit=None, fill_value=None, dtype=None): - """ perform an actual interpolation of values, values will be make 2-d if - needed fills inplace, returns the result +def pad(values, method='pad', axis=0, limit=None, fill_value=None, dtype=None): + """ + Perform an actual interpolation of values. 1-d values will be made 2-d temporarily. + Returns the result """ + ndim = values.ndim + shape = values.shape + + func = partial(pad, method=method, limit=limit, fill_value=fill_value, dtype=dtype) + + if ndim > 2: + if ndim == 3: + if axis == 0: + for n in range(shape[1]): + values[:,n] = func(values[:,n], axis=1) + else: + for n in range(shape[0]): + values[n] = func(values[n], axis=(1 if axis == 1 else 0)) + else: + if axis == 0: + for n in range(shape[1]): + values[:,n] = func(values[:,n], axis=0) + else: + for n in range(shape[0]): + values[n] = func(values[n], axis=axis-1) + + return values + transf = (lambda x: x) if axis == 0 else (lambda x: x.T) # reshape a 1 dim if needed - ndim = values.ndim if values.ndim == 1: if axis != 0: # pragma: no cover raise AssertionError("cannot interpolate on a ndim == 1 with " @@ -252,35 +275,6 @@ def fill_2d(values, method='pad', axis=0, limit=None, fill_value=None, dtype=Non return values -def fill_nd(values, method='pad', axis=0, limit=None, fill_value=None, dtype=None): - - ndim = values.ndim - shape = values.shape - - if ndim < 3: - raise AssertionError("This function should only be used on arrays of " - "dimensionality 3 or higher") - func = fill_2d if ndim == 3 else fill_nd - method = partial(func, method=method, limit=limit, fill_value=fill_value, dtype=dtype) - - if ndim == 3: - if axis == 0: - for n in range(shape[1]): - values[:,n] = method(values[:,n], axis=1) - else: - for n in range(shape[0]): - values[n] = method(values[n], axis=(1 if axis == 1 else 0)) - else: - if axis == 0: - for n in range(shape[1]): - values[:,n] = method(values[:,n], axis=0) - else: - for n in range(shape[0]): - values[n] = method(values[n], axis=axis-1) - - return values - - def _interp_wrapper(f, wrap_dtype, na_override=None): def wrapper(arr, mask, limit=None): view = arr.view(wrap_dtype) From 5bffc9e4d1fb43ecd06286fcbb78259e6d2e467d Mon Sep 17 00:00:00 2001 From: Nicholas Stahl Date: Fri, 30 Oct 2015 17:17:19 -0400 Subject: [PATCH 07/10] added test for filling across dtypes with axis=0 --- pandas/tests/test_panel.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index ecf05382a501c..0c092f89c4090 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1539,6 +1539,11 @@ def test_fillna_axis_0(self): self.assertTrue( filled['ItemC']['C'][b_nan.diff(a_nan)].apply(np.isnan).all()) + # limit not implemented when only value is specified + panel = self.panel.copy() + panel['str'] = 'foo' + self.assertRaises(NotImplementedError, lambda : panel.fillna(method='ffill', axis=0)) + def test_ffill_bfill(self): assert_panel_equal(self.panel.ffill(), self.panel.fillna(method='ffill')) From 493a7333c1d2ecdfbc9790941ed860d93dda703e Mon Sep 17 00:00:00 2001 From: Nicholas Stahl Date: Fri, 30 Oct 2015 17:36:52 -0400 Subject: [PATCH 08/10] missed one --- pandas/core/internals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 972992067b4f9..08048d684e407 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -3774,7 +3774,7 @@ def reindex(self, new_axis, indexer=None, method=None, fill_value=None, # fill if needed if method is not None or limit is not None: - new_values = mis.fill_2d(new_values, method=method, + new_values = mis.pad(new_values, method=method, limit=limit, fill_value=fill_value) if self._block.is_sparse: From 77af392605a8120f9fd4982128de77bfe6875163 Mon Sep 17 00:00:00 2001 From: Nicholas Stahl Date: Fri, 30 Oct 2015 18:07:39 -0400 Subject: [PATCH 09/10] added more detail to whatsnew; more descriptive NotImplemented errors --- doc/source/whatsnew/v0.17.1.txt | 4 +++- pandas/core/common.py | 1 - pandas/core/generic.py | 6 ++++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt index 228a04a0fc3c5..60b5e85a2bbae 100755 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.txt @@ -117,4 +117,6 @@ Bug Fixes - Bug in ``DataFrame.to_dict()`` produces a ``np.datetime64`` object instead of ``Timestamp`` when only datetime is present in data (:issue:`11327`) -- Bug in ``Panel.fillna()`` does not fill across axis 0 +- Bug in ``Panel.fillna()`` does not fill across axis 0 (:issue:`8251`) + +- Bug in ``Panel.fillna()`` loses index names (:issue:`3570`) diff --git a/pandas/core/common.py b/pandas/core/common.py index 6874a074fcb74..4490aaf58a002 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -1420,7 +1420,6 @@ def _fill_zeros(result, x, y, name, fill): return result - def _consensus_name_attr(objs): name = objs[0].name for obj in objs[1:]: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1179abcffe572..34e9047a9fdd2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2783,8 +2783,10 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, if method is None: raise ValueError('must specify a fill method or value') if self._is_mixed_type: - if ((self.ndim > 2) and (axis == 0)) or inplace: - raise NotImplementedError() + if (self.ndim > 2) and (axis == 0): + raise NotImplementedError('cannot fill across axis 0 for mixed dtypes') + if inplace: + raise NotImplementedError('cannot fill inplace for mixed dtypes') elif (self.ndim == 2) and (axis == 1): result = self.T.fillna(method=method, limit=limit).T From b3eae7550dba969f5ba14754c79df7d519537411 Mon Sep 17 00:00:00 2001 From: Nicholas Stahl Date: Sat, 31 Oct 2015 18:05:00 -0400 Subject: [PATCH 10/10] ENH: support Akima 1D interpolation --- doc/source/whatsnew/v0.17.1.txt | 1 + pandas/core/missing.py | 18 ++++++++++++++---- pandas/tests/test_generic.py | 9 +++++++++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt index be62a53bccaf4..13389b603ed6c 100755 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.txt @@ -25,6 +25,7 @@ Enhancements objects for the ``filepath_or_buffer`` argument. (:issue:`11033`) - ``DataFrame`` now uses the fields of a ``namedtuple`` as columns, if columns are not supplied (:issue:`11181`) - Improve the error message displayed in :func:`pandas.io.gbq.to_gbq` when the DataFrame does not match the schema of the destination table (:issue:`11359`) +- Akima 1D interpolation is now supported (:issue:`7588`) .. _whatsnew_0171.api: diff --git a/pandas/core/missing.py b/pandas/core/missing.py index bc66b6542bcbf..184faf1f0e7c3 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -39,7 +39,7 @@ def _clean_interp_method(method, **kwargs): valid = ['linear', 'time', 'index', 'values', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'polynomial', 'krogh', 'piecewise_polynomial', - 'pchip', 'spline'] + 'pchip', 'spline', 'akima'] if method in ('spline', 'polynomial') and order is None: raise ValueError("You must specify the order of the spline or " "polynomial.") @@ -144,7 +144,7 @@ def _interp_limit(invalid, fw_limit, bw_limit): sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh', 'spline', 'polynomial', - 'piecewise_polynomial', 'pchip'] + 'piecewise_polynomial', 'pchip', 'akima'] if method in sp_methods: inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 @@ -156,6 +156,8 @@ def _interp_limit(invalid, fw_limit, bw_limit): bounds_error=bounds_error, order=order, **kwargs) result[violate_limit] = np.nan return result + else: + raise ValueError('interpolation method not found') def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None, @@ -214,8 +216,16 @@ def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None, y = y.copy() if not new_x.flags.writeable: new_x = new_x.copy() - method = alt_methods[method] - new_y = method(x, y, new_x, **kwargs) + if method == 'akima': + try: + interpolator = interpolate.Akima1DInterpolator(x, y) + except AttributeError: + raise ImportError("Your version of scipy does not support " + "Akima interpolation" ) + new_y = interpolator(new_x) + else: + method = alt_methods[method] + new_y = method(x, y, new_x, **kwargs) return new_y diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index d29673e96ecdd..ab240ea90a3f3 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -1167,6 +1167,15 @@ def test_interp_alt_scipy(self): expected.ix[5,'A'] = 6.125 assert_frame_equal(result, expected) + try: + from scipy.interpolate import Akima1DInterpolator + except ImportError: + raise nose.SkipTest('scipy.interpolate.Akima1DInterpolator missing') + result = df.interpolate(method='akima') + expected.ix[2,'A'] = 3 + expected.ix[5,'A'] = 6 + assert_frame_equal(result, expected) + def test_interp_rowwise(self): df = DataFrame({0: [1, 2, np.nan, 4], 1: [2, 3, 4, np.nan],