From ed8145b925e92bad5a90e9f2c69863552f30e90f Mon Sep 17 00:00:00 2001 From: mcjcode Date: Tue, 9 Sep 2014 01:09:35 -0400 Subject: [PATCH 1/6] Explicitly create class definitions of whitelisted SeriesGroupBy and DataFrameGroupBy methods --- doc/source/api.rst | 104 +++++++++++++++++++++++++ pandas/core/groupby.py | 96 ++++++++++++++++++++++- pandas/tests/test_groupby.py | 4 + pandas/tests/test_groupby_whitelist.py | 63 +++++++++++++++ 4 files changed, 266 insertions(+), 1 deletion(-) create mode 100644 pandas/tests/test_groupby_whitelist.py diff --git a/doc/source/api.rst b/doc/source/api.rst index 62518bf0d9ffd..42d57debf9a66 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1313,6 +1313,110 @@ Computations / Descriptive Stats GroupBy.std GroupBy.var GroupBy.ohlc + GroupBy.nth + GroupBy.prod + GroupBy.size + +DataFrameGroupBy +---------------- +.. currentmodule:: pandas.core.groupby + +A DataFrameGroupBy object is returned by :func:`pandas.DataFrame.groupby`. + +Computations / Descriptive Stats +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + DataFrameGroupBy.all + DataFrameGroupBy.any + DataFrameGroupBy.bfill + DataFrameGroupBy.boxplot + DataFrameGroupBy.corr + DataFrameGroupBy.corrwith + DataFrameGroupBy.count + DataFrameGroupBy.cov + DataFrameGroupBy.cumcount + DataFrameGroupBy.cummax + DataFrameGroupBy.cummin + DataFrameGroupBy.cumprod + DataFrameGroupBy.cumsum + DataFrameGroupBy.describe + DataFrameGroupBy.diff + DataFrameGroupBy.ffill + DataFrameGroupBy.fillna + DataFrameGroupBy.first + DataFrameGroupBy.head + DataFrameGroupBy.hist + DataFrameGroupBy.idxmax + DataFrameGroupBy.idxmin + DataFrameGroupBy.irow + DataFrameGroupBy.last + DataFrameGroupBy.mad + DataFrameGroupBy.max + DataFrameGroupBy.min + DataFrameGroupBy.pct_change + DataFrameGroupBy.plot + DataFrameGroupBy.quantile + DataFrameGroupBy.rank + DataFrameGroupBy.resample + DataFrameGroupBy.shift + DataFrameGroupBy.skew + DataFrameGroupBy.sum + DataFrameGroupBy.tail + DataFrameGroupBy.take + DataFrameGroupBy.tshift + +SeriesGroupBy +------------- + +A SeriesGroupBy object is returned by :func:`pandas.Series.groupby`. + +Computations / Descriptive Stats +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + SeriesGroupBy.all + SeriesGroupBy.any + SeriesGroupBy.bfill + SeriesGroupBy.corr + SeriesGroupBy.count + SeriesGroupBy.cov + SeriesGroupBy.cumcount + SeriesGroupBy.cummax + SeriesGroupBy.cummin + SeriesGroupBy.cumprod + SeriesGroupBy.cumsum + SeriesGroupBy.describe + SeriesGroupBy.diff + SeriesGroupBy.dtype + SeriesGroupBy.ffill + SeriesGroupBy.fillna + SeriesGroupBy.first + SeriesGroupBy.head + SeriesGroupBy.hist + SeriesGroupBy.idxmax + SeriesGroupBy.idxmin + SeriesGroupBy.irow + SeriesGroupBy.last + SeriesGroupBy.mad + SeriesGroupBy.max + SeriesGroupBy.min + SeriesGroupBy.nunique + SeriesGroupBy.pct_change + SeriesGroupBy.plot + SeriesGroupBy.quantile + SeriesGroupBy.rank + SeriesGroupBy.resample + SeriesGroupBy.shift + SeriesGroupBy.skew + SeriesGroupBy.sum + SeriesGroupBy.tail + SeriesGroupBy.take + SeriesGroupBy.tshift + SeriesGroupBy.unique + SeriesGroupBy.value_counts .. currentmodule:: pandas diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 212e5086ee543..735f06ada8763 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -516,7 +516,7 @@ def wrapper(*args, **kwargs): # a little trickery for aggregation functions that need an axis # argument kwargs_with_axis = kwargs.copy() - if 'axis' not in kwargs_with_axis: + if 'axis' not in kwargs_with_axis or kwargs_with_axis['axis']==None: kwargs_with_axis['axis'] = self.axis def curried_with_axis(x): @@ -2087,9 +2087,99 @@ def _convert_grouper(axis, grouper): else: return grouper +from inspect import getargspec +def _make_signature(func) : + """ + Returns a string repr of the arg list of a func call, with any defaults + + Examples + -------- + + >>> def f(a,b,c=2) : + >>> return a*b*c + >>> print(_make_signature(f)) + a,b,c=2 + """ + spec = getargspec(func) + if spec.defaults == None : + n_wo_defaults = len(spec.args) + defaults = ('',) * n_wo_defaults + else : + n_wo_defaults = len(spec.args) - (len(spec.defaults) if spec.defaults != None else 0) + defaults = ('',) * n_wo_defaults + spec.defaults + args = [] + for i, (var, default) in enumerate(zip(spec.args, defaults)) : + args.append(var if default=='' else var+'='+repr(default)) + return args, spec.args + +def _whitelist_method_generator(klass, whitelist) : + """ + Yields all GroupBy member defs for DataFrame/Series names in _whitelist. + + Parameters + ---------- + klass - class where members are defined. Should be Series or DataFrame + + whitelist - list of names of klass methods to be constructed + + Returns + ------- + The generator yields a sequence of strings, each suitable for exec'ing, + that define implementations of the named methods for DataFrameGroupBy + or SeriesGroupBy. + + Since we don't want to override methods explicitly defined in the + base class, any such name is skipped. + """ + + method_wrapper_template = \ + """def %(name)s(%(sig)s) : + \""" + %(doc)s + \""" + f = %(self)s.__getattr__('%(name)s') + return f(%(args)s) + """ + property_wrapper_template = \ + """@property +def %(name)s(self) : + \""" + %(doc)s + \""" + return self.__getattr__('%(name)s') + """ + for name in whitelist : + # don't override anything that was explicitly defined + # in the base class + if hasattr(GroupBy,name) : + continue + # ugly, but we need the name string itself in the method. + f = getattr(klass,name) + doc = f.__doc__ + doc = doc if type(doc)==str else '' + if type(f) == types.MethodType : + wrapper_template = method_wrapper_template + decl, args = _make_signature(f) + # pass args by name to f because otherwise + # GroupBy._make_wrapper won't know whether + # we passed in an axis parameter. + args_by_name = ['{0}={0}'.format(arg) for arg in args[1:]] + params = {'name':name, + 'doc':doc, + 'sig':','.join(decl), + 'self':args[0], + 'args':','.join(args_by_name)} + else : + wrapper_template = property_wrapper_template + params = {'name':name, 'doc':doc} + yield wrapper_template % params class SeriesGroupBy(GroupBy): + # + # Make class defs of attributes on SeriesGroupBy whitelist _apply_whitelist = _series_apply_whitelist + for _def_str in _whitelist_method_generator(Series,_series_apply_whitelist) : + exec(_def_str) def aggregate(self, func_or_funcs, *args, **kwargs): """ @@ -2977,6 +3067,10 @@ def filter(self, func, dropna=True, *args, **kwargs): class DataFrameGroupBy(NDFrameGroupBy): _apply_whitelist = _dataframe_apply_whitelist + # + # Make class defs of attributes on DataFrameGroupBy whitelist. + for _def_str in _whitelist_method_generator(DataFrame,_apply_whitelist) : + exec(_def_str) _block_agg_axis = 1 diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 8e9503b4fe1a3..6b42a69a3dafc 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -4154,6 +4154,10 @@ def test_groupby_whitelist(self): self.assertEqual(whitelist, gb._apply_whitelist) for m in whitelist: getattr(gb, m) + # Also make sure that the class itself has + # the method defined (dtypes is not a method) + if m not in ['dtypes'] : + self.assertTrue(hasattr(type(gb), m)) def test_groupby_blacklist(self): from string import ascii_lowercase diff --git a/pandas/tests/test_groupby_whitelist.py b/pandas/tests/test_groupby_whitelist.py new file mode 100644 index 0000000000000..09806db9432ab --- /dev/null +++ b/pandas/tests/test_groupby_whitelist.py @@ -0,0 +1,63 @@ +# pylint: disable-msg=W0612,E1101,W0141 +import datetime +import nose + +from numpy.random import randn +import numpy as np + +from pandas.core.index import Index, MultiIndex +from pandas import Panel, DataFrame, Series, notnull, isnull + +from pandas.util.testing import (assert_almost_equal, + assert_series_equal, + assert_frame_equal, + assertRaisesRegexp) +import pandas.core.common as com +import pandas.util.testing as tm +from pandas.compat import (range, lrange, StringIO, lzip, u, + product as cart_product, zip) +import pandas as pd + +import pandas.index as _index + + +class TestNewGroupByAttr(tm.TestCase): + + _multiprocess_can_split_ = True + + def setUp(self): + import warnings + warnings.filterwarnings(action='ignore', category=FutureWarning) + + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], + ['one', 'two', 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + self.frame = DataFrame(np.random.randn(10, 3), index=index, + columns=Index(['A', 'B', 'C'], name='exp')) + + self.frame.ix[1, [1, 2]] = np.nan + self.frame.ix[7, [0, 1]] = np.nan + + AGG_FUNCTIONS = ['skew', 'mad'] + + def test_newattr(self) : + for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS, + lrange(2), lrange(2), + [True,False]) : + if axis == 0 : + frame = self.frame + else : + frame = self.frame.T + + grouped = frame.groupby(level=level,axis=axis) + result = getattr(grouped,op)(skipna=skipna) + expected = getattr(frame,op)(level=level,axis=axis,skipna=skipna) + assert_frame_equal(result, expected) + +if __name__ == '__main__': + + import nose + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) From a77bc2d5b10bb86d12613f53f72221f43ecf4a44 Mon Sep 17 00:00:00 2001 From: mcjcode Date: Wed, 10 Sep 2014 22:33:02 -0400 Subject: [PATCH 2/6] Moved new test into test_groupby.py. Wrapped execs in try/except blocks to show offending code string --- pandas/core/groupby.py | 14 ++++-- pandas/tests/test_groupby.py | 28 +++++++++++- pandas/tests/test_groupby_whitelist.py | 63 -------------------------- 3 files changed, 38 insertions(+), 67 deletions(-) delete mode 100644 pandas/tests/test_groupby_whitelist.py diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 6559fcf877c6f..38603e769fa27 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2224,8 +2224,12 @@ class SeriesGroupBy(GroupBy): # Make class defs of attributes on SeriesGroupBy whitelist _apply_whitelist = _series_apply_whitelist for _def_str in _whitelist_method_generator(Series,_series_apply_whitelist) : - exec(_def_str) - + try : + exec(_def_str) + except SyntaxError as e : + print(_def_str) + raise e + def aggregate(self, func_or_funcs, *args, **kwargs): """ Apply aggregation function or functions to groups, yielding most likely @@ -3138,7 +3142,11 @@ class DataFrameGroupBy(NDFrameGroupBy): # # Make class defs of attributes on DataFrameGroupBy whitelist. for _def_str in _whitelist_method_generator(DataFrame,_apply_whitelist) : - exec(_def_str) + try : + exec(_def_str) + except SyntaxError as e : + print(_def_str) + raise e _block_agg_axis = 1 diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index f25d3d104157b..39bc5e52246c5 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -19,7 +19,7 @@ assert_index_equal, assertRaisesRegexp) from pandas.compat import( range, long, lrange, StringIO, lmap, lzip, map, - zip, builtins, OrderedDict + zip, builtins, OrderedDict, product as cart_product ) from pandas import compat from pandas.core.panel import Panel @@ -4315,6 +4315,32 @@ def test_groupby_whitelist(self): if m not in ['dtypes'] : self.assertTrue(hasattr(type(gb), m)) + def test_regression_whitelist_methods(self) : + + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], + ['one', 'two', 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + raw_frame = DataFrame(np.random.randn(10, 3), index=index, + columns=Index(['A', 'B', 'C'], name='exp')) + raw_frame.ix[1, [1, 2]] = np.nan + raw_frame.ix[7, [0, 1]] = np.nan + + for op, level, axis, skipna in cart_product(['skew', 'mad'], + lrange(2), lrange(2), + [True,False]) : + + if axis == 0 : + frame = raw_frame + else : + frame = raw_frame.T + + grouped = frame.groupby(level=level,axis=axis) + result = getattr(grouped,op)(skipna=skipna) + expected = getattr(frame,op)(level=level,axis=axis,skipna=skipna) + assert_frame_equal(result, expected) + def test_groupby_blacklist(self): from string import ascii_lowercase letters = np.array(list(ascii_lowercase)) diff --git a/pandas/tests/test_groupby_whitelist.py b/pandas/tests/test_groupby_whitelist.py deleted file mode 100644 index 09806db9432ab..0000000000000 --- a/pandas/tests/test_groupby_whitelist.py +++ /dev/null @@ -1,63 +0,0 @@ -# pylint: disable-msg=W0612,E1101,W0141 -import datetime -import nose - -from numpy.random import randn -import numpy as np - -from pandas.core.index import Index, MultiIndex -from pandas import Panel, DataFrame, Series, notnull, isnull - -from pandas.util.testing import (assert_almost_equal, - assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) -import pandas.core.common as com -import pandas.util.testing as tm -from pandas.compat import (range, lrange, StringIO, lzip, u, - product as cart_product, zip) -import pandas as pd - -import pandas.index as _index - - -class TestNewGroupByAttr(tm.TestCase): - - _multiprocess_can_split_ = True - - def setUp(self): - import warnings - warnings.filterwarnings(action='ignore', category=FutureWarning) - - index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], - ['one', 'two', 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=['first', 'second']) - self.frame = DataFrame(np.random.randn(10, 3), index=index, - columns=Index(['A', 'B', 'C'], name='exp')) - - self.frame.ix[1, [1, 2]] = np.nan - self.frame.ix[7, [0, 1]] = np.nan - - AGG_FUNCTIONS = ['skew', 'mad'] - - def test_newattr(self) : - for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS, - lrange(2), lrange(2), - [True,False]) : - if axis == 0 : - frame = self.frame - else : - frame = self.frame.T - - grouped = frame.groupby(level=level,axis=axis) - result = getattr(grouped,op)(skipna=skipna) - expected = getattr(frame,op)(level=level,axis=axis,skipna=skipna) - assert_frame_equal(result, expected) - -if __name__ == '__main__': - - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) From 70ecbaa7c5c044efdc2cff894d5758be81274566 Mon Sep 17 00:00:00 2001 From: mcjcode Date: Wed, 10 Sep 2014 23:12:00 -0400 Subject: [PATCH 3/6] Removed trailing 4-spaces in exec'ed strings --- pandas/core/groupby.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 38603e769fa27..25606ab0cbaaf 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2183,16 +2183,14 @@ def _whitelist_method_generator(klass, whitelist) : %(doc)s \""" f = %(self)s.__getattr__('%(name)s') - return f(%(args)s) - """ + return f(%(args)s)""" property_wrapper_template = \ """@property def %(name)s(self) : \""" %(doc)s \""" - return self.__getattr__('%(name)s') - """ + return self.__getattr__('%(name)s')""" for name in whitelist : # don't override anything that was explicitly defined # in the base class @@ -2227,7 +2225,10 @@ class SeriesGroupBy(GroupBy): try : exec(_def_str) except SyntaxError as e : + print('-'*80) print(_def_str) + print('-'*80) + print(e) raise e def aggregate(self, func_or_funcs, *args, **kwargs): @@ -3145,7 +3146,9 @@ class DataFrameGroupBy(NDFrameGroupBy): try : exec(_def_str) except SyntaxError as e : + print('-'*80) print(_def_str) + print('-'*80) raise e _block_agg_axis = 1 From 1b4536b37505a6b60356d8681944db17218a7559 Mon Sep 17 00:00:00 2001 From: mcjcode Date: Thu, 11 Sep 2014 22:21:32 -0400 Subject: [PATCH 4/6] Put all Computation/Descriptive aggregating functions under GroupBy --- doc/source/api.rst | 152 +++++++++++++++++---------------------------- 1 file changed, 58 insertions(+), 94 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 18c90e130d59f..2d94042ff8df7 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1328,116 +1328,80 @@ Computations / Descriptive Stats .. autosummary:: :toctree: generated/ + GroupBy.count + GroupBy.cumcount + GroupBy.first + GroupBy.head + GroupBy.last + GroupBy.max GroupBy.mean GroupBy.median + GroupBy.min + GroupBy.nth + GroupBy.ohlc + GroupBy.prod + GroupBy.size GroupBy.sem GroupBy.std + GroupBy.sum GroupBy.var - GroupBy.ohlc - GroupBy.nth - GroupBy.prod - GroupBy.size + GroupBy.tail -DataFrameGroupBy ----------------- -.. currentmodule:: pandas.core.groupby +The following methods are available in both ``SeriesGroupBy`` and +``DataFrameGroupBy`` objects, but may differ slightly, usually in that +the ``DataFrameGroupBy`` version usually permits the specification of an +axis argument, and often an argument indicating whether to restrict +application to columns of a specific data type. -A DataFrameGroupBy object is returned by :func:`pandas.DataFrame.groupby`. +.. autosummary:: + :toctree: generated/ + + DataFrameGroupBy.bfill + DataFrameGroupBy.cummax + DataFrameGroupBy.cummin + DataFrameGroupBy.cumprod + DataFrameGroupBy.cumsum + DataFrameGroupBy.describe + DataFrameGroupBy.all + DataFrameGroupBy.any + DataFrameGroupBy.corr + DataFrameGroupBy.cov + DataFrameGroupBy.diff + DataFrameGroupBy.ffill + DataFrameGroupBy.fillna + DataFrameGroupBy.hist + DataFrameGroupBy.idxmax + DataFrameGroupBy.idxmin + DataFrameGroupBy.irow + DataFrameGroupBy.mad + DataFrameGroupBy.pct_change + DataFrameGroupBy.plot + DataFrameGroupBy.quantile + DataFrameGroupBy.rank + DataFrameGroupBy.resample + DataFrameGroupBy.shift + DataFrameGroupBy.skew + DataFrameGroupBy.take + DataFrameGroupBy.tshift + +The following methods are available only for ``SeriesGroupBy`` objects. -Computations / Descriptive Stats -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autosummary:: :toctree: generated/ - DataFrameGroupBy.all - DataFrameGroupBy.any - DataFrameGroupBy.bfill - DataFrameGroupBy.boxplot - DataFrameGroupBy.corr - DataFrameGroupBy.corrwith - DataFrameGroupBy.count - DataFrameGroupBy.cov - DataFrameGroupBy.cumcount - DataFrameGroupBy.cummax - DataFrameGroupBy.cummin - DataFrameGroupBy.cumprod - DataFrameGroupBy.cumsum - DataFrameGroupBy.describe - DataFrameGroupBy.diff - DataFrameGroupBy.ffill - DataFrameGroupBy.fillna - DataFrameGroupBy.first - DataFrameGroupBy.head - DataFrameGroupBy.hist - DataFrameGroupBy.idxmax - DataFrameGroupBy.idxmin - DataFrameGroupBy.irow - DataFrameGroupBy.last - DataFrameGroupBy.mad - DataFrameGroupBy.max - DataFrameGroupBy.min - DataFrameGroupBy.pct_change - DataFrameGroupBy.plot - DataFrameGroupBy.quantile - DataFrameGroupBy.rank - DataFrameGroupBy.resample - DataFrameGroupBy.shift - DataFrameGroupBy.skew - DataFrameGroupBy.sum - DataFrameGroupBy.tail - DataFrameGroupBy.take - DataFrameGroupBy.tshift - -SeriesGroupBy -------------- + SeriesGroupBy.nlargest + SeriesGroupBy.nsmallest + SeriesGroupBy.nunique + SeriesGroupBy.unique + SeriesGroupBy.value_counts -A SeriesGroupBy object is returned by :func:`pandas.Series.groupby`. +The following methods are available only for ``DataFrameGroupBy`` objects. -Computations / Descriptive Stats -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autosummary:: :toctree: generated/ - SeriesGroupBy.all - SeriesGroupBy.any - SeriesGroupBy.bfill - SeriesGroupBy.corr - SeriesGroupBy.count - SeriesGroupBy.cov - SeriesGroupBy.cumcount - SeriesGroupBy.cummax - SeriesGroupBy.cummin - SeriesGroupBy.cumprod - SeriesGroupBy.cumsum - SeriesGroupBy.describe - SeriesGroupBy.diff - SeriesGroupBy.dtype - SeriesGroupBy.ffill - SeriesGroupBy.fillna - SeriesGroupBy.first - SeriesGroupBy.head - SeriesGroupBy.hist - SeriesGroupBy.idxmax - SeriesGroupBy.idxmin - SeriesGroupBy.irow - SeriesGroupBy.last - SeriesGroupBy.mad - SeriesGroupBy.max - SeriesGroupBy.min - SeriesGroupBy.nunique - SeriesGroupBy.pct_change - SeriesGroupBy.plot - SeriesGroupBy.quantile - SeriesGroupBy.rank - SeriesGroupBy.resample - SeriesGroupBy.shift - SeriesGroupBy.skew - SeriesGroupBy.sum - SeriesGroupBy.tail - SeriesGroupBy.take - SeriesGroupBy.tshift - SeriesGroupBy.unique - SeriesGroupBy.value_counts + DataFrameGroupBy.corrwith + DataFrameGroupBy.boxplot .. currentmodule:: pandas From 6d31eb78bef48b44e931be24233e444d8c38108a Mon Sep 17 00:00:00 2001 From: mcjcode Date: Fri, 12 Sep 2014 21:10:37 -0400 Subject: [PATCH 5/6] cleaned up whitelist test, put inspect import inside _make_signature definition --- pandas/core/groupby.py | 2 +- pandas/tests/test_groupby.py | 26 ++++++++++++++++---------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index ab69e87f485da..d44ca32a38ab4 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2133,7 +2133,6 @@ def _convert_grouper(axis, grouper): else: return grouper -from inspect import getargspec def _make_signature(func) : """ Returns a string repr of the arg list of a func call, with any defaults @@ -2146,6 +2145,7 @@ def _make_signature(func) : >>> print(_make_signature(f)) a,b,c=2 """ + from inspect import getargspec spec = getargspec(func) if spec.defaults == None : n_wo_defaults = len(spec.args) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index e4f30444b9f4d..ab6363b705aa5 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -4327,12 +4327,12 @@ def test_groupby_whitelist(self): gb = obj.groupby(df.letters) self.assertEqual(whitelist, gb._apply_whitelist) for m in whitelist: - getattr(gb, m) - # Also make sure that the class itself has - # the method defined (dtypes is not a method) - if m not in ['dtypes'] : - self.assertTrue(hasattr(type(gb), m)) + getattr(type(gb), m) + AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew', + 'mad', 'std', 'var', 'sem'] + AGG_FUNCTIONS_WITH_SKIPNA = ['skew', 'mad'] + def test_regression_whitelist_methods(self) : index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], @@ -4345,7 +4345,7 @@ def test_regression_whitelist_methods(self) : raw_frame.ix[1, [1, 2]] = np.nan raw_frame.ix[7, [0, 1]] = np.nan - for op, level, axis, skipna in cart_product(['skew', 'mad'], + for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS, lrange(2), lrange(2), [True,False]) : @@ -4354,10 +4354,16 @@ def test_regression_whitelist_methods(self) : else : frame = raw_frame.T - grouped = frame.groupby(level=level,axis=axis) - result = getattr(grouped,op)(skipna=skipna) - expected = getattr(frame,op)(level=level,axis=axis,skipna=skipna) - assert_frame_equal(result, expected) + if op in self.AGG_FUNCTIONS_WITH_SKIPNA : + grouped = frame.groupby(level=level,axis=axis) + result = getattr(grouped,op)(skipna=skipna) + expected = getattr(frame,op)(level=level,axis=axis,skipna=skipna) + assert_frame_equal(result, expected) + else : + grouped = frame.groupby(level=level,axis=axis) + result = getattr(grouped,op)() + expected = getattr(frame,op)(level=level,axis=axis) + assert_frame_equal(result, expected) def test_groupby_blacklist(self): from string import ascii_lowercase From 295120b31d94f0f73f5e06929faa1c83e2f5d651 Mon Sep 17 00:00:00 2001 From: mcjcode Date: Sat, 13 Sep 2014 21:39:41 -0400 Subject: [PATCH 6/6] Moved make_signature method out to decorators.py --- pandas/core/groupby.py | 46 ++++----------------------------------- pandas/util/decorators.py | 26 ++++++++++++++++++++++ 2 files changed, 30 insertions(+), 42 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index d44ca32a38ab4..02e36c30e962b 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -18,7 +18,7 @@ from pandas.core.internals import BlockManager, make_block from pandas.core.series import Series from pandas.core.panel import Panel -from pandas.util.decorators import cache_readonly, Appender +from pandas.util.decorators import cache_readonly, Appender, make_signature import pandas.core.algorithms as algos import pandas.core.common as com from pandas.core.common import(_possibly_downcast_to_dtype, isnull, @@ -2133,31 +2133,6 @@ def _convert_grouper(axis, grouper): else: return grouper -def _make_signature(func) : - """ - Returns a string repr of the arg list of a func call, with any defaults - - Examples - -------- - - >>> def f(a,b,c=2) : - >>> return a*b*c - >>> print(_make_signature(f)) - a,b,c=2 - """ - from inspect import getargspec - spec = getargspec(func) - if spec.defaults == None : - n_wo_defaults = len(spec.args) - defaults = ('',) * n_wo_defaults - else : - n_wo_defaults = len(spec.args) - len(spec.defaults) - defaults = ('',) * n_wo_defaults + spec.defaults - args = [] - for i, (var, default) in enumerate(zip(spec.args, defaults)) : - args.append(var if default=='' else var+'='+repr(default)) - return args, spec.args - def _whitelist_method_generator(klass, whitelist) : """ Yields all GroupBy member defs for DataFrame/Series names in _whitelist. @@ -2203,7 +2178,7 @@ def %(name)s(self) : doc = doc if type(doc)==str else '' if type(f) == types.MethodType : wrapper_template = method_wrapper_template - decl, args = _make_signature(f) + decl, args = make_signature(f) # pass args by name to f because otherwise # GroupBy._make_wrapper won't know whether # we passed in an axis parameter. @@ -2223,14 +2198,7 @@ class SeriesGroupBy(GroupBy): # Make class defs of attributes on SeriesGroupBy whitelist _apply_whitelist = _series_apply_whitelist for _def_str in _whitelist_method_generator(Series,_series_apply_whitelist) : - try : - exec(_def_str) - except SyntaxError as e : - print('-'*80) - print(_def_str) - print('-'*80) - print(e) - raise e + exec(_def_str) def aggregate(self, func_or_funcs, *args, **kwargs): """ @@ -3144,13 +3112,7 @@ class DataFrameGroupBy(NDFrameGroupBy): # # Make class defs of attributes on DataFrameGroupBy whitelist. for _def_str in _whitelist_method_generator(DataFrame,_apply_whitelist) : - try : - exec(_def_str) - except SyntaxError as e : - print('-'*80) - print(_def_str) - print('-'*80) - raise e + exec(_def_str) _block_agg_axis = 1 diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index 288ec164198e4..c74c35fd07f5e 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -258,3 +258,29 @@ def knownfailer(*args, **kwargs): return nose.tools.make_decorator(f)(knownfailer) return knownfail_decorator + +def make_signature(func) : + """ + Returns a string repr of the arg list of a func call, with any defaults + + Examples + -------- + + >>> def f(a,b,c=2) : + >>> return a*b*c + >>> print(_make_signature(f)) + a,b,c=2 + """ + from inspect import getargspec + spec = getargspec(func) + if spec.defaults == None : + n_wo_defaults = len(spec.args) + defaults = ('',) * n_wo_defaults + else : + n_wo_defaults = len(spec.args) - len(spec.defaults) + defaults = ('',) * n_wo_defaults + spec.defaults + args = [] + for i, (var, default) in enumerate(zip(spec.args, defaults)) : + args.append(var if default=='' else var+'='+repr(default)) + return args, spec.args +