From f9de80f8b3ddd740f8dc70a5170cc5571f1e70e4 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sat, 13 Feb 2016 01:53:17 +0000 Subject: [PATCH] BUG: Prevent abuse of kwargs in stat functions Filters kwargs argument in stat functions to prevent the passage of clearly invalid arguments while at the same time maintaining compatibility with analogous numpy functions. Closes gh-12301. --- doc/source/whatsnew/v0.18.0.txt | 2 ++ pandas/core/generic.py | 20 ++++++++++++++++++++ pandas/tests/test_generic.py | 20 +++++++++++++++++--- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 095c48e54c44c..c6d02acf75477 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -824,6 +824,8 @@ Other API Changes - As part of the new API for :ref:`window functions ` and :ref:`resampling `, aggregation functions have been clarified, raising more informative error messages on invalid aggregations. (:issue:`9052`). A full set of examples are presented in :ref:`groupby `. +- Statistical functions for ``NDFrame`` objects will now raise if non-numpy-compatible arguments are passed in for ``**kwargs`` (:issue:`12301`) + .. _whatsnew_0180.deprecations: Deprecations diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5e171e3339d8b..3f7b27eca2b55 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5207,12 +5207,29 @@ def _doc_parms(cls): %(outname)s : %(name1)s\n""" +def _validate_kwargs(fname, kwargs, *compat_args): + """ + Checks whether parameters passed to the + **kwargs argument in a 'stat' function 'fname' + are valid parameters as specified in *compat_args + + """ + list(map(kwargs.__delitem__, filter( + kwargs.__contains__, compat_args))) + if kwargs: + bad_arg = list(kwargs)[0] # first 'key' element + raise TypeError(("{fname}() got an unexpected " + "keyword argument '{arg}'". + format(fname=fname, arg=bad_arg))) + + def _make_stat_function(name, name1, name2, axis_descr, desc, f): @Substitution(outname=name, desc=desc, name1=name1, name2=name2, axis_descr=axis_descr) @Appender(_num_doc) def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): + _validate_kwargs(name, kwargs, 'out', 'dtype') if skipna is None: skipna = True if axis is None: @@ -5233,6 +5250,7 @@ def _make_stat_function_ddof(name, name1, name2, axis_descr, desc, f): @Appender(_num_ddof_doc) def stat_func(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None, **kwargs): + _validate_kwargs(name, kwargs, 'out', 'dtype') if skipna is None: skipna = True if axis is None: @@ -5254,6 +5272,7 @@ def _make_cum_function(name, name1, name2, axis_descr, desc, accum_func, @Appender("Return cumulative {0} over requested axis.".format(name) + _cnum_doc) def func(self, axis=None, dtype=None, out=None, skipna=True, **kwargs): + _validate_kwargs(name, kwargs, 'out', 'dtype') if axis is None: axis = self._stat_axis_number else: @@ -5288,6 +5307,7 @@ def _make_logical_function(name, name1, name2, axis_descr, desc, f): @Appender(_bool_doc) def logical_func(self, axis=None, bool_only=None, skipna=None, level=None, **kwargs): + _validate_kwargs(name, kwargs, 'out', 'dtype') if skipna is None: skipna = True if axis is None: diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index ee83e97de76eb..7983ac7fff834 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -16,12 +16,14 @@ from pandas.compat import range, zip from pandas import compat -from pandas.util.testing import (assert_series_equal, +from pandas.util.testing import (assertRaisesRegexp, + assert_series_equal, assert_frame_equal, assert_panel_equal, assert_panel4d_equal, assert_almost_equal, assert_equal) + import pandas.util.testing as tm @@ -483,8 +485,6 @@ def test_split_compat(self): self.assertTrue(len(np.array_split(o, 2)) == 2) def test_unexpected_keyword(self): # GH8597 - from pandas.util.testing import assertRaisesRegexp - df = DataFrame(np.random.randn(5, 2), columns=['jim', 'joe']) ca = pd.Categorical([0, 0, 2, 2, 3, np.nan]) ts = df['joe'].copy() @@ -502,6 +502,20 @@ def test_unexpected_keyword(self): # GH8597 with assertRaisesRegexp(TypeError, 'unexpected keyword'): ts.fillna(0, in_place=True) + # See gh-12301 + def test_stat_unexpected_keyword(self): + obj = self._construct(5) + starwars = 'Star Wars' + + with assertRaisesRegexp(TypeError, 'unexpected keyword'): + obj.max(epic=starwars) # stat_function + with assertRaisesRegexp(TypeError, 'unexpected keyword'): + obj.var(epic=starwars) # stat_function_ddof + with assertRaisesRegexp(TypeError, 'unexpected keyword'): + obj.sum(epic=starwars) # cum_function + with assertRaisesRegexp(TypeError, 'unexpected keyword'): + obj.any(epic=starwars) # logical_function + class TestSeries(tm.TestCase, Generic): _typ = Series