From 6f041e68c057a94386fbd9ed8334e249556aaa2e Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 26 Dec 2016 14:35:03 -0800 Subject: [PATCH 1/2] Bug: Raise ValueError with interpolate limit = 0 add whatsnew Add similar log to fillna Update whatsnew with issue and fillna change Add test for a negative limit change if statements to assert move limit check closer to where the variable is used Changes assertion to ValueError --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/generic.py | 6 +++--- pandas/core/internals.py | 3 +++ pandas/core/missing.py | 7 +++++-- pandas/tests/series/test_missing.py | 10 ++++++++++ 5 files changed, 22 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 6fe066b08e255..bb0d6003c28b7 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -418,6 +418,7 @@ Other API Changes - ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`) - ``DataFrame.applymap()`` with an empty ``DataFrame`` will return a copy of the empty ``DataFrame`` instead of a ``Series`` (:issue:`8222`) - ``.loc`` has compat with ``.ix`` for accepting iterators, and NamedTuples (:issue:`15120`) +- ``interpolate()`` and ``fillna()`` will raise a ``ValueError`` if the ``limit`` keyword argument is not greater than 0. (:issue:`9217`) - ``pd.read_csv()`` will now issue a ``ParserWarning`` whenever there are conflicting values provided by the ``dialect`` parameter and the user (:issue:`14898`) - ``pd.read_csv()`` will now raise a ``ValueError`` for the C engine if the quote character is larger than than one byte (:issue:`11592`) - ``inplace`` arguments now require a boolean value, else a ``ValueError`` is thrown (:issue:`14189`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 228dd2acd2124..20e6e027dbf09 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3262,7 +3262,7 @@ def convert_objects(self, convert_dates=True, convert_numeric=False, a gap with more than this number of consecutive NaNs, it will only be partially filled. If method is not specified, this is the maximum number of entries along the entire axis where NaNs will be - filled. + filled. Must be greater than 0 if not None. downcast : dict, default is None a dict of item->dtype of what to downcast if possible, or the string 'infer' which will try to downcast to an appropriate @@ -3281,6 +3281,7 @@ def convert_objects(self, convert_dates=True, convert_numeric=False, def fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None): inplace = validate_bool_kwarg(inplace, 'inplace') + if isinstance(value, (list, tuple)): raise TypeError('"value" parameter must be a scalar or dict, but ' 'you passed a "{0}"'.format(type(value).__name__)) @@ -3292,7 +3293,6 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, axis = 0 axis = self._get_axis_number(axis) method = missing.clean_fill_method(method) - from pandas import DataFrame if value is None: if method is None: @@ -3687,7 +3687,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, * 0: fill column-by-column * 1: fill row-by-row limit : int, default None. - Maximum number of consecutive NaNs to fill. + Maximum number of consecutive NaNs to fill. Must be greater than 0. limit_direction : {'forward', 'backward', 'both'}, default 'forward' If limit is specified, consecutive NaNs will be filled in this direction. diff --git a/pandas/core/internals.py b/pandas/core/internals.py index f0b1516d786c6..6f7feb5a6633c 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -372,6 +372,9 @@ def fillna(self, value, limit=None, inplace=False, downcast=None, original_value = value mask = isnull(self.values) if limit is not None: + if is_integer(limit) and limit < 1: + raise ValueError('`limit` keyword argument must be greater ' + 'than 0.') if self.ndim > 2: raise NotImplementedError("number of dimensions for 'fillna' " "is currently limited to 2") diff --git a/pandas/core/missing.py b/pandas/core/missing.py index e83a0518d97f6..2cdf543ed9b6e 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -12,7 +12,7 @@ is_float_dtype, is_datetime64_dtype, is_datetime64tz_dtype, is_integer_dtype, _ensure_float64, is_scalar, - needs_i8_conversion) + needs_i8_conversion, is_integer) from pandas.types.missing import isnull @@ -169,7 +169,10 @@ def _interp_limit(invalid, fw_limit, bw_limit): # the beginning (see issues #9218 and #10420) violate_limit = sorted(start_nans) - if limit: + if limit is not None: + if is_integer(limit) and limit < 1: + raise ValueError('`limit` keyword argument must be greater ' + 'than 0.') if limit_direction == 'forward': violate_limit = sorted(start_nans | set(_interp_limit(invalid, limit, 0))) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 702fa2acb5106..22e325ef3b05b 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -295,6 +295,11 @@ def test_fillna_raise(self): self.assertRaises(TypeError, s.fillna, [1, 2]) self.assertRaises(TypeError, s.fillna, (1, 2)) + # related GH 9217, make sure limit is greater than 0 + for limit in [-1, 0]: + s = Series([1, 2, 3, None]) + tm.assertRaises(ValueError, lambda: s.fillna(1, limit=limit)) + def test_fillna_nat(self): series = Series([0, 1, 2, tslib.iNaT], dtype='M8[ns]') @@ -865,6 +870,11 @@ def test_interp_limit(self): result = s.interpolate(method='linear', limit=2) assert_series_equal(result, expected) + # GH 9217 + for limit in [-1, 0]: + s = pd.Series([1, 2, np.nan, np.nan, 5]) + tm.assertRaises(ValueError, lambda: s.interpolate(limit=limit)) + def test_interp_limit_forward(self): s = Series([1, 3, np.nan, np.nan, np.nan, 11]) From c1790eeb3f5f65f1f40e489a7c2d075eb6cf1dc3 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 9 Feb 2017 22:16:46 -0800 Subject: [PATCH 2/2] Unify ValueError message and correct cython limits Add stricter integer check with tests --- pandas/core/internals.py | 7 +++--- pandas/core/missing.py | 7 +++--- pandas/src/algos_common_helper.pxi.in | 36 ++++++++++++++++++--------- pandas/tests/series/test_missing.py | 24 ++++++++++++------ 4 files changed, 48 insertions(+), 26 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 6f7feb5a6633c..6cd5eceed5f2a 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -372,9 +372,10 @@ def fillna(self, value, limit=None, inplace=False, downcast=None, original_value = value mask = isnull(self.values) if limit is not None: - if is_integer(limit) and limit < 1: - raise ValueError('`limit` keyword argument must be greater ' - 'than 0.') + if not is_integer(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') if self.ndim > 2: raise NotImplementedError("number of dimensions for 'fillna' " "is currently limited to 2") diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 2cdf543ed9b6e..ffd0423572f5e 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -170,9 +170,10 @@ def _interp_limit(invalid, fw_limit, bw_limit): violate_limit = sorted(start_nans) if limit is not None: - if is_integer(limit) and limit < 1: - raise ValueError('`limit` keyword argument must be greater ' - 'than 0.') + if not is_integer(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') if limit_direction == 'forward': violate_limit = sorted(start_nans | set(_interp_limit(invalid, limit, 0))) diff --git a/pandas/src/algos_common_helper.pxi.in b/pandas/src/algos_common_helper.pxi.in index 5e87528943005..42089f9520ab6 100644 --- a/pandas/src/algos_common_helper.pxi.in +++ b/pandas/src/algos_common_helper.pxi.in @@ -83,8 +83,10 @@ def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new, if limit is None: lim = nright else: - if limit < 0: - raise ValueError('Limit must be non-negative') + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') lim = limit if nleft == 0 or nright == 0 or new[nright - 1] < old[0]: @@ -146,8 +148,10 @@ def pad_inplace_{{name}}(ndarray[{{c_type}}] values, if limit is None: lim = N else: - if limit < 0: - raise ValueError('Limit must be non-negative') + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') lim = limit val = values[0] @@ -180,8 +184,10 @@ def pad_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values, if limit is None: lim = N else: - if limit < 0: - raise ValueError('Limit must be non-negative') + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') lim = limit for j in range(K): @@ -240,8 +246,10 @@ def backfill_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new, if limit is None: lim = nright else: - if limit < 0: - raise ValueError('Limit must be non-negative') + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') lim = limit if nleft == 0 or nright == 0 or new[0] > old[nleft - 1]: @@ -304,8 +312,10 @@ def backfill_inplace_{{name}}(ndarray[{{c_type}}] values, if limit is None: lim = N else: - if limit < 0: - raise ValueError('Limit must be non-negative') + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') lim = limit val = values[N - 1] @@ -338,8 +348,10 @@ def backfill_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values, if limit is None: lim = N else: - if limit < 0: - raise ValueError('Limit must be non-negative') + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') lim = limit for j in range(K): diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 22e325ef3b05b..aae14905d375b 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -295,10 +295,12 @@ def test_fillna_raise(self): self.assertRaises(TypeError, s.fillna, [1, 2]) self.assertRaises(TypeError, s.fillna, (1, 2)) - # related GH 9217, make sure limit is greater than 0 - for limit in [-1, 0]: - s = Series([1, 2, 3, None]) - tm.assertRaises(ValueError, lambda: s.fillna(1, limit=limit)) + # related GH 9217, make sure limit is an int and greater than 0 + s = Series([1, 2, 3, None]) + for limit in [-1, 0, 1., 2.]: + for method in ['backfill', 'bfill', 'pad', 'ffill', None]: + with tm.assertRaises(ValueError): + s.fillna(1, limit=limit, method=method) def test_fillna_nat(self): series = Series([0, 1, 2, tslib.iNaT], dtype='M8[ns]') @@ -870,10 +872,16 @@ def test_interp_limit(self): result = s.interpolate(method='linear', limit=2) assert_series_equal(result, expected) - # GH 9217 - for limit in [-1, 0]: - s = pd.Series([1, 2, np.nan, np.nan, 5]) - tm.assertRaises(ValueError, lambda: s.interpolate(limit=limit)) + # GH 9217, make sure limit is an int and greater than 0 + methods = ['linear', 'time', 'index', 'values', 'nearest', 'zero', + 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh', + 'polynomial', 'spline', 'piecewise_polynomial', None, + 'from_derivatives', 'pchip', 'akima'] + s = pd.Series([1, 2, np.nan, np.nan, 5]) + for limit in [-1, 0, 1., 2.]: + for method in methods: + with tm.assertRaises(ValueError): + s.interpolate(limit=limit, method=method) def test_interp_limit_forward(self): s = Series([1, 3, np.nan, np.nan, np.nan, 11])