From 74fbfcb512d70903908211d71433b7f6d00f3f20 Mon Sep 17 00:00:00 2001 From: jreback Date: Thu, 13 Mar 2014 09:29:20 -0400 Subject: [PATCH] BUG: Bug in fillna with limit and value specified --- doc/source/release.rst | 2 +- pandas/core/generic.py | 10 +++++++--- pandas/core/internals.py | 20 +++++++++++++++++--- pandas/tests/test_frame.py | 11 +++++++++++ pandas/tests/test_panel.py | 5 +++++ pandas/tests/test_series.py | 10 ++++++++++ 6 files changed, 51 insertions(+), 7 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 6f83bea6a6578..4890f22e98468 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -232,7 +232,7 @@ Bug Fixes - Bug in sql writing with mixed dtypes possibly leading to data loss (:issue:`6509`) - Bug in popping from a Series (:issue:`6600`) - Bug in ``iloc`` indexing when positional indexer matched Int64Index of corresponding axis no reordering happened (:issue:`6612`) - +- Bug in ``fillna`` with ``limit`` and ``value`` specified pandas 0.13.1 ------------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 678578a19e221..0a8f57c581d92 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2162,7 +2162,9 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, from pandas import Series value = Series(value) - new_data = self._data.fillna(value=value, inplace=inplace, + new_data = self._data.fillna(value=value, + limit=limit, + inplace=inplace, downcast=downcast) elif isinstance(value, (dict, com.ABCSeries)): @@ -2176,10 +2178,12 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, if k not in result: continue obj = result[k] - obj.fillna(v, inplace=True) + obj.fillna(v, limit=limit, inplace=True) return result else: - new_data = self._data.fillna(value=value, inplace=inplace, + new_data = self._data.fillna(value=value, + limit=limit, + inplace=inplace, downcast=downcast) if inplace: diff --git a/pandas/core/internals.py b/pandas/core/internals.py index a5b9e874cea41..fc7b4bc23ac09 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -367,7 +367,7 @@ def apply(self, func, **kwargs): """ apply the function to my values; return a block if we are not one """ return self.as_block(func(self.values)) - def fillna(self, value, inplace=False, downcast=None): + def fillna(self, value, limit=None, inplace=False, downcast=None): if not self._can_hold_na: if inplace: return [self] @@ -375,6 +375,11 @@ def fillna(self, value, inplace=False, downcast=None): return [self.copy()] mask = com.isnull(self.values) + if limit is not None: + if self.ndim > 2: + raise NotImplementedError + mask[mask.cumsum(self.ndim-1)>limit]=False + value = self._try_fill(value) blocks = self.putmask(mask, value, inplace=inplace) return self._maybe_downcast(blocks, downcast) @@ -1680,11 +1685,18 @@ def _try_fill(self, value): value = tslib.iNaT return value - def fillna(self, value, inplace=False, downcast=None): + def fillna(self, value, limit=None, + inplace=False, downcast=None): + # straight putmask here values = self.values if inplace else self.values.copy() mask = com.isnull(self.values) value = self._try_fill(value) + if limit is not None: + if self.ndim > 2: + raise NotImplementedError + mask[mask.cumsum(self.ndim-1)>limit]=False + np.putmask(values, mask, value) return [self if inplace else make_block(values, self.items, self.ref_items, fastpath=True)] @@ -1889,8 +1901,10 @@ def interpolate(self, method='pad', axis=0, inplace=False, self.values.to_dense(), method, axis, limit, fill_value) return self.make_block(values, self.items, self.ref_items) - def fillna(self, value, inplace=False, downcast=None): + def fillna(self, value, limit=None, inplace=False, downcast=None): # we may need to upcast our fill to match our dtype + if limit is not None: + raise NotImplementedError if issubclass(self.dtype.type, np.floating): value = float(value) values = self.values if inplace else self.values.copy() diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 4758670517df0..ed88a355cf7a9 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -7113,6 +7113,17 @@ def test_fillna(self): df.fillna({ 2: 'foo' }, inplace=True) assert_frame_equal(df, expected) + # limit and value + df = DataFrame(np.random.randn(10,3)) + df.iloc[2:7,0] = np.nan + df.iloc[3:5,2] = np.nan + + expected = df.copy() + expected.iloc[2,0] = 999 + expected.iloc[3,2] = 999 + result = df.fillna(999,limit=1) + assert_frame_equal(result, expected) + def test_fillna_dtype_conversion(self): # make sure that fillna on an empty frame works df = DataFrame(index=["A","B","C"], columns = [1,2,3,4,5]) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 3f6e4c6f3288c..b2721689f574d 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1322,6 +1322,11 @@ def test_fillna(self): self.assertRaises(TypeError, self.panel.fillna, [1, 2]) self.assertRaises(TypeError, self.panel.fillna, (1, 2)) + # limit not implemented when only value is specified + p = Panel(np.random.randn(3,4,5)) + p.iloc[0:2,0:2,0:2] = np.nan + self.assertRaises(NotImplementedError, lambda : p.fillna(999,limit=1)) + def test_ffill_bfill(self): assert_panel_equal(self.panel.ffill(), self.panel.fillna(method='ffill')) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 93fa739f7f218..b90cdcf55f636 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -2941,6 +2941,16 @@ def test_fillna(self): expected = Series([0,0,2.], list('bac')) assert_series_equal(result,expected) + # limit + s = Series(np.nan,index=[0,1,2]) + result = s.fillna(999,limit=1) + expected = Series([999,np.nan,np.nan],index=[0,1,2]) + assert_series_equal(result,expected) + + result = s.fillna(999,limit=2) + expected = Series([999,999,np.nan],index=[0,1,2]) + assert_series_equal(result,expected) + def test_fillna_bug(self): x = Series([nan, 1., nan, 3., nan], ['z', 'a', 'b', 'c', 'd']) filled = x.fillna(method='ffill')