From bfeaf551cdcdd1f8af22cc27dbd6faedc97aee0d Mon Sep 17 00:00:00 2001 From: Bob Haffner Date: Sat, 25 Nov 2017 12:56:18 -0600 Subject: [PATCH 1/6] fix list agg and new tests --- pandas/core/groupby.py | 7 +++--- pandas/tests/groupby/test_aggregate.py | 33 ++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index ba180cc98cb08..69de7630ede2c 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2299,8 +2299,7 @@ def _aggregate_series_pure_python(self, obj, func): for label, group in splitter: res = func(group) if result is None: - if (isinstance(res, (Series, Index, np.ndarray)) or - isinstance(res, list)): + if (isinstance(res, (Series, Index, np.ndarray))): raise ValueError('Function does not reduce') result = np.empty(ngroups, dtype='O') @@ -3022,7 +3021,9 @@ def aggregate(self, func_or_funcs, *args, **kwargs): if isinstance(func_or_funcs, compat.string_types): return getattr(self, func_or_funcs)(*args, **kwargs) - if hasattr(func_or_funcs, '__iter__'): + if isinstance(func_or_funcs, collections.Iterable): + # Catch instances of lists / tuples + # but not the class list / tuple itself. ret = self._aggregate_multiple_funcs(func_or_funcs, (_level or 0) + 1) else: diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index 913d3bcc09869..040d8ac218ceb 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -892,3 +892,36 @@ def test_sum_uint64_overflow(self): expected.index.name = 0 result = df.groupby(0).sum() tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("structure, expected", [ + (tuple, pd.DataFrame({'C': {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}})), + (list, pd.DataFrame({'C': {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}})), + (lambda x: tuple(x), pd.DataFrame({'C': {(1, 1): (1, 1, 1), + (3, 4): (3, 4, 4)}})), + (lambda x: list(x), pd.DataFrame({'C': {(1, 1): [1, 1, 1], + (3, 4): [3, 4, 4]}})) + ]) + def test_agg_structs_dataframe(self, structure, expected): + df = pd.DataFrame({'A': [1, 1, 1, 3, 3, 3], + 'B': [1, 1, 1, 4, 4, 4], 'C': [1, 1, 1, 3, 4, 4]}) + + result = df.groupby(['A', 'B']).aggregate(structure) + expected.index.names = ['A', 'B'] + assert_frame_equal(result, expected) + + @pytest.mark.parametrize("structure, expected", [ + (tuple, pd.Series([(1, 1, 1), (3, 4, 4)], index=[1, 3], name='C')), + (list, pd.Series([[1, 1, 1], [3, 4, 4]], index=[1, 3], name='C')), + (lambda x: tuple(x), pd.Series([(1, 1, 1), (3, 4, 4)], + index=[1, 3], name='C')), + (lambda x: list(x), pd.Series([[1, 1, 1], [3, 4, 4]], + index=[1, 3], name='C')) + ]) + def test_agg_structs_series(self, structure, expected): + # Issue #18079 + df = pd.DataFrame({'A': [1, 1, 1, 3, 3, 3], + 'B': [1, 1, 1, 4, 4, 4], 'C': [1, 1, 1, 3, 4, 4]}) + + result = df.groupby('A')['C'].aggregate(structure) + expected.index.name = 'A' + assert_series_equal(result, expected) From 836ef4c02e3d44b42039c1c6eba2c68a6aa14dbf Mon Sep 17 00:00:00 2001 From: Bob Haffner Date: Sat, 25 Nov 2017 13:03:40 -0600 Subject: [PATCH 2/6] added whatsnew --- doc/source/whatsnew/v0.21.1.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt index f8274bda546f7..85a6ee726580a 100644 --- a/doc/source/whatsnew/v0.21.1.txt +++ b/doc/source/whatsnew/v0.21.1.txt @@ -103,7 +103,11 @@ Groupby/Resample/Rolling - Bug in ``DataFrame.resample(...).apply(...)`` when there is a callable that returns different columns (:issue:`15169`) - Bug in ``DataFrame.resample(...)`` when there is a time change (DST) and resampling frequecy is 12h or higher (:issue:`15549`) - Bug in ``pd.DataFrameGroupBy.count()`` when counting over a datetimelike column (:issue:`13393`) +<<<<<<< HEAD - Bug in ``rolling.var`` where calculation is inaccurate with a zero-valued array (:issue:`18430`) +======= +- Bug when grouping by a single column and aggregating with a class like`list` or `tuple` (:issue:`18079`) +>>>>>>> added whatsnew - - From 374c3702ebdeb6043dfc5f54a21ec3f78807d50e Mon Sep 17 00:00:00 2001 From: Bob Haffner Date: Sat, 25 Nov 2017 17:47:11 -0600 Subject: [PATCH 3/6] rebase cleanup --- doc/source/whatsnew/v0.21.1.txt | 6 +----- doc/source/whatsnew/v0.22.0.txt | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt index 85a6ee726580a..6cc9ff28e513e 100644 --- a/doc/source/whatsnew/v0.21.1.txt +++ b/doc/source/whatsnew/v0.21.1.txt @@ -103,11 +103,7 @@ Groupby/Resample/Rolling - Bug in ``DataFrame.resample(...).apply(...)`` when there is a callable that returns different columns (:issue:`15169`) - Bug in ``DataFrame.resample(...)`` when there is a time change (DST) and resampling frequecy is 12h or higher (:issue:`15549`) - Bug in ``pd.DataFrameGroupBy.count()`` when counting over a datetimelike column (:issue:`13393`) -<<<<<<< HEAD - Bug in ``rolling.var`` where calculation is inaccurate with a zero-valued array (:issue:`18430`) -======= -- Bug when grouping by a single column and aggregating with a class like`list` or `tuple` (:issue:`18079`) ->>>>>>> added whatsnew - - @@ -151,4 +147,4 @@ Other ^^^^^ - -- +- \ No newline at end of file diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 4ae3d9be04aa7..84e36858417cd 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -168,7 +168,7 @@ Plotting Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- +- Bug when grouping by a single column and aggregating with a class like ``list`` or ``tuple`` (:issue:`18079`) - - From 6639d48801877fb85f8bbe7052bbb07a435cfdac Mon Sep 17 00:00:00 2001 From: Bob Haffner Date: Sat, 25 Nov 2017 16:38:57 -0600 Subject: [PATCH 4/6] backticks and moved to .22 --- doc/source/whatsnew/v0.21.1.txt.orig | 154 +++++++++++++++++++++++++++ grp.patch | 13 +++ grp_test.patch | 15 +++ test_agg.py | 44 ++++++++ 4 files changed, 226 insertions(+) create mode 100644 doc/source/whatsnew/v0.21.1.txt.orig create mode 100644 grp.patch create mode 100644 grp_test.patch create mode 100644 test_agg.py diff --git a/doc/source/whatsnew/v0.21.1.txt.orig b/doc/source/whatsnew/v0.21.1.txt.orig new file mode 100644 index 0000000000000..85a6ee726580a --- /dev/null +++ b/doc/source/whatsnew/v0.21.1.txt.orig @@ -0,0 +1,154 @@ +.. _whatsnew_0211: + +v0.21.1 +------- + +This is a minor release from 0.21.1 and includes a number of deprecations, new +features, enhancements, and performance improvements along with a large number +of bug fixes. We recommend that all users upgrade to this version. + +.. _whatsnew_0211.enhancements: + +New features +~~~~~~~~~~~~ + +- +- +- + +.. _whatsnew_0211.enhancements.other: + +Other Enhancements +^^^^^^^^^^^^^^^^^^ + +- :meth:`Timestamp.timestamp` is now available in Python 2.7. (:issue:`17329`) +- +- + +.. _whatsnew_0211.deprecations: + +Deprecations +~~~~~~~~~~~~ + +- +- +- + +.. _whatsnew_0211.performance: + +Performance Improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Improved performance of plotting large series/dataframes (:issue:`18236`). +- +- + +.. _whatsnew_0211.docs: + +Documentation Changes +~~~~~~~~~~~~~~~~~~~~~ + +- +- +- + +.. _whatsnew_0211.bug_fixes: + +Bug Fixes +~~~~~~~~~ + +Conversion +^^^^^^^^^^ + +- Bug in :class:`TimedeltaIndex` subtraction could incorrectly overflow when ``NaT`` is present (:issue:`17791`) +- Bug in :class:`DatetimeIndex` subtracting datetimelike from DatetimeIndex could fail to overflow (:issue:`18020`) +- Bug in :meth:`IntervalIndex.copy` when copying and ``IntervalIndex`` with non-default ``closed`` (:issue:`18339`) +- Bug in :func:`DataFrame.to_dict` where columns of datetime that are tz-aware were not converted to required arrays when used with ``orient='records'``, raising``TypeError` (:issue:`18372`) +- +- + +Indexing +^^^^^^^^ + +- Bug in a boolean comparison of a ``datetime.datetime`` and a ``datetime64[ns]`` dtype Series (:issue:`17965`) +- Bug where a ``MultiIndex`` with more than a million records was not raising ``AttributeError`` when trying to access a missing attribute (:issue:`18165`) +- Bug in :class:`IntervalIndex` constructor when a list of intervals is passed with non-default ``closed`` (:issue:`18334`) +- Bug in ``Index.putmask`` when an invalid mask passed (:issue:`18368`) +- + +I/O +^^^ + +- Bug in class:`~pandas.io.stata.StataReader` not converting date/time columns with display formatting addressed (:issue:`17990`). Previously columns with display formatting were normally left as ordinal numbers and not converted to datetime objects. +- Bug in :func:`read_csv` when reading a compressed UTF-16 encoded file (:issue:`18071`) +- Bug in :func:`read_csv` for handling null values in index columns when specifying ``na_filter=False`` (:issue:`5239`) +- Bug in :func:`read_csv` when reading numeric category fields with high cardinality (:issue:`18186`) +- Bug in :meth:`DataFrame.to_csv` when the table had ``MultiIndex`` columns, and a list of strings was passed in for ``header`` (:issue:`5539`) +- :func:`read_parquet` now allows to specify the columns to read from a parquet file (:issue:`18154`) +- :func:`read_parquet` now allows to specify kwargs which are passed to the respective engine (:issue:`18216`) +- Bug in parsing integer datetime-like columns with specified format in ``read_sql`` (:issue:`17855`). +- Bug in :meth:`DataFrame.to_msgpack` when serializing data of the numpy.bool_ datatype (:issue:`18390`) + + +Plotting +^^^^^^^^ + +- +- +- + +Groupby/Resample/Rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug in ``DataFrame.resample(...).apply(...)`` when there is a callable that returns different columns (:issue:`15169`) +- Bug in ``DataFrame.resample(...)`` when there is a time change (DST) and resampling frequecy is 12h or higher (:issue:`15549`) +- Bug in ``pd.DataFrameGroupBy.count()`` when counting over a datetimelike column (:issue:`13393`) +<<<<<<< HEAD +- Bug in ``rolling.var`` where calculation is inaccurate with a zero-valued array (:issue:`18430`) +======= +- Bug when grouping by a single column and aggregating with a class like`list` or `tuple` (:issue:`18079`) +>>>>>>> added whatsnew +- +- + +Sparse +^^^^^^ + +- +- +- + +Reshaping +^^^^^^^^^ + +- Error message in ``pd.merge_asof()`` for key datatype mismatch now includes datatype of left and right key (:issue:`18068`) +- Bug in ``pd.concat`` when empty and non-empty DataFrames or Series are concatenated (:issue:`18178` :issue:`18187`) +- Bug in ``DataFrame.filter(...)`` when :class:`unicode` is passed as a condition in Python 2 (:issue:`13101`) +- + +Numeric +^^^^^^^ + +- Bug in ``pd.Series.rolling.skew()`` and ``rolling.kurt()`` with all equal values has floating issue (:issue:`18044`) +- +- +- + +Categorical +^^^^^^^^^^^ + +- Bug in :meth:`DataFrame.astype` where casting to 'category' on an empty ``DataFrame`` causes a segmentation fault (:issue:`18004`) +- Error messages in the testing module have been improved when items have different ``CategoricalDtype`` (:issue:`18069`) +- ``CategoricalIndex`` can now correctly take a ``pd.api.types.CategoricalDtype`` as its dtype (:issue:`18116`) +- Bug in ``Categorical.unique()`` returning read-only ``codes`` array when all categories were ``NaN`` (:issue:`18051`) + +String +^^^^^^ + +- :meth:`Series.str.split()` will now propogate ``NaN`` values across all expanded columns instead of ``None`` (:issue:`18450`) + +Other +^^^^^ + +- +- diff --git a/grp.patch b/grp.patch new file mode 100644 index 0000000000000..7f8b07ed47437 --- /dev/null +++ b/grp.patch @@ -0,0 +1,13 @@ +--- a/pandas/core/groupby.py ++++ b/pandas/core/groupby.py +@@ -3022,7 +3022,9 @@ class SeriesGroupBy(GroupBy): + if isinstance(func_or_funcs, compat.string_types): + return getattr(self, func_or_funcs)(*args, **kwargs) + +- if hasattr(func_or_funcs, '__iter__'): ++ if isinstance(func_or_funcs, collections.Iterable): ++ # Catch instances of lists / tuples ++ # but not the class list / tuple itself. + ret = self._aggregate_multiple_funcs(func_or_funcs, + (_level or 0) + 1) + else: diff --git a/grp_test.patch b/grp_test.patch new file mode 100644 index 0000000000000..275bb8419a071 --- /dev/null +++ b/grp_test.patch @@ -0,0 +1,15 @@ +--- a/pandas/tests/groupby/test_groupby.py ++++ b/pandas/tests/groupby/test_groupby.py +@@ -2725,3 +2725,12 @@ def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): + expected = f(df.groupby(tups)[field]) + for k, v in compat.iteritems(expected): + assert (result[k] == v) ++ ++ ++def test_tuple(): ++ df = pd.DataFrame({'A': [1, 1, 1, 3, 3, 3], ++ 'B': [1, 1, 1, 4, 4, 4], 'C': [1, 1, 1, 3, 4, 4]}) ++ ++ result = df.groupby(['A', 'B']).aggregate(tuple) ++ result2 = df.groupby('A').aggregate(tuple) ++ result2 = df.groupby('A').aggregate([tuple]) diff --git a/test_agg.py b/test_agg.py new file mode 100644 index 0000000000000..85225e96b0294 --- /dev/null +++ b/test_agg.py @@ -0,0 +1,44 @@ +import pandas as pd +import numpy as np + +def f(x): + return list(x) + +#df = pd.DataFrame({'A' : [1, 1, 3], 'B' : [1, 2, 4]}) +#result = df.groupby('A').aggregate(f) + + +#df = pd.DataFrame({'A' : [1, 1, 3], 'B' : [1, 2, 4]}) +#result = df.groupby('A').aggregate(list) +#result = df.groupby('A').agg(list) + +df = pd.DataFrame({'A' : [1, 1, 3], 'B' : [1, 1, 4], 'C' : [1, 3, 4]}) +#result = df.groupby(['A', 'B']).aggregate(pd.Series) + + +#df = pd.DataFrame({'A': [1, 1, 1, 3, 3, 3], + # 'B': [1, 1, 1, 4, 4, 4], 'C': [1, 1, 1, 3, 4, 4]}) + +#print ('series ') +result = df.groupby('A')['C'].aggregate(np.array) +#print (result) +# +result = df.groupby(['A', 'B']).aggregate(np.array) +#print (result) +# +# result = df.groupby('A')['C'].aggregate(list) +# print (result) + +def f(x): + return np.array(x) + +print ('array') +result = df.groupby(['A', 'B']).aggregate(f) +print (result) + +# result = df.groupby('A')['C'].aggregate(tuple) +# expected = pd.Series([(1, 1, 1), (3, 4, 4)], index=[1, 3], name='C') +# expected.index.name = 'A' + + + From 1361f745bdc6e24ae4f9a0f8125992b9858e62cd Mon Sep 17 00:00:00 2001 From: Bob Haffner Date: Sat, 25 Nov 2017 18:12:49 -0600 Subject: [PATCH 5/6] removed local files --- doc/source/whatsnew/v0.21.1.txt.orig | 154 --------------------------- grp.patch | 13 --- test_agg.py | 44 -------- 3 files changed, 211 deletions(-) delete mode 100644 doc/source/whatsnew/v0.21.1.txt.orig delete mode 100644 grp.patch delete mode 100644 test_agg.py diff --git a/doc/source/whatsnew/v0.21.1.txt.orig b/doc/source/whatsnew/v0.21.1.txt.orig deleted file mode 100644 index 85a6ee726580a..0000000000000 --- a/doc/source/whatsnew/v0.21.1.txt.orig +++ /dev/null @@ -1,154 +0,0 @@ -.. _whatsnew_0211: - -v0.21.1 -------- - -This is a minor release from 0.21.1 and includes a number of deprecations, new -features, enhancements, and performance improvements along with a large number -of bug fixes. We recommend that all users upgrade to this version. - -.. _whatsnew_0211.enhancements: - -New features -~~~~~~~~~~~~ - -- -- -- - -.. _whatsnew_0211.enhancements.other: - -Other Enhancements -^^^^^^^^^^^^^^^^^^ - -- :meth:`Timestamp.timestamp` is now available in Python 2.7. (:issue:`17329`) -- -- - -.. _whatsnew_0211.deprecations: - -Deprecations -~~~~~~~~~~~~ - -- -- -- - -.. _whatsnew_0211.performance: - -Performance Improvements -~~~~~~~~~~~~~~~~~~~~~~~~ - -- Improved performance of plotting large series/dataframes (:issue:`18236`). -- -- - -.. _whatsnew_0211.docs: - -Documentation Changes -~~~~~~~~~~~~~~~~~~~~~ - -- -- -- - -.. _whatsnew_0211.bug_fixes: - -Bug Fixes -~~~~~~~~~ - -Conversion -^^^^^^^^^^ - -- Bug in :class:`TimedeltaIndex` subtraction could incorrectly overflow when ``NaT`` is present (:issue:`17791`) -- Bug in :class:`DatetimeIndex` subtracting datetimelike from DatetimeIndex could fail to overflow (:issue:`18020`) -- Bug in :meth:`IntervalIndex.copy` when copying and ``IntervalIndex`` with non-default ``closed`` (:issue:`18339`) -- Bug in :func:`DataFrame.to_dict` where columns of datetime that are tz-aware were not converted to required arrays when used with ``orient='records'``, raising``TypeError` (:issue:`18372`) -- -- - -Indexing -^^^^^^^^ - -- Bug in a boolean comparison of a ``datetime.datetime`` and a ``datetime64[ns]`` dtype Series (:issue:`17965`) -- Bug where a ``MultiIndex`` with more than a million records was not raising ``AttributeError`` when trying to access a missing attribute (:issue:`18165`) -- Bug in :class:`IntervalIndex` constructor when a list of intervals is passed with non-default ``closed`` (:issue:`18334`) -- Bug in ``Index.putmask`` when an invalid mask passed (:issue:`18368`) -- - -I/O -^^^ - -- Bug in class:`~pandas.io.stata.StataReader` not converting date/time columns with display formatting addressed (:issue:`17990`). Previously columns with display formatting were normally left as ordinal numbers and not converted to datetime objects. -- Bug in :func:`read_csv` when reading a compressed UTF-16 encoded file (:issue:`18071`) -- Bug in :func:`read_csv` for handling null values in index columns when specifying ``na_filter=False`` (:issue:`5239`) -- Bug in :func:`read_csv` when reading numeric category fields with high cardinality (:issue:`18186`) -- Bug in :meth:`DataFrame.to_csv` when the table had ``MultiIndex`` columns, and a list of strings was passed in for ``header`` (:issue:`5539`) -- :func:`read_parquet` now allows to specify the columns to read from a parquet file (:issue:`18154`) -- :func:`read_parquet` now allows to specify kwargs which are passed to the respective engine (:issue:`18216`) -- Bug in parsing integer datetime-like columns with specified format in ``read_sql`` (:issue:`17855`). -- Bug in :meth:`DataFrame.to_msgpack` when serializing data of the numpy.bool_ datatype (:issue:`18390`) - - -Plotting -^^^^^^^^ - -- -- -- - -Groupby/Resample/Rolling -^^^^^^^^^^^^^^^^^^^^^^^^ - -- Bug in ``DataFrame.resample(...).apply(...)`` when there is a callable that returns different columns (:issue:`15169`) -- Bug in ``DataFrame.resample(...)`` when there is a time change (DST) and resampling frequecy is 12h or higher (:issue:`15549`) -- Bug in ``pd.DataFrameGroupBy.count()`` when counting over a datetimelike column (:issue:`13393`) -<<<<<<< HEAD -- Bug in ``rolling.var`` where calculation is inaccurate with a zero-valued array (:issue:`18430`) -======= -- Bug when grouping by a single column and aggregating with a class like`list` or `tuple` (:issue:`18079`) ->>>>>>> added whatsnew -- -- - -Sparse -^^^^^^ - -- -- -- - -Reshaping -^^^^^^^^^ - -- Error message in ``pd.merge_asof()`` for key datatype mismatch now includes datatype of left and right key (:issue:`18068`) -- Bug in ``pd.concat`` when empty and non-empty DataFrames or Series are concatenated (:issue:`18178` :issue:`18187`) -- Bug in ``DataFrame.filter(...)`` when :class:`unicode` is passed as a condition in Python 2 (:issue:`13101`) -- - -Numeric -^^^^^^^ - -- Bug in ``pd.Series.rolling.skew()`` and ``rolling.kurt()`` with all equal values has floating issue (:issue:`18044`) -- -- -- - -Categorical -^^^^^^^^^^^ - -- Bug in :meth:`DataFrame.astype` where casting to 'category' on an empty ``DataFrame`` causes a segmentation fault (:issue:`18004`) -- Error messages in the testing module have been improved when items have different ``CategoricalDtype`` (:issue:`18069`) -- ``CategoricalIndex`` can now correctly take a ``pd.api.types.CategoricalDtype`` as its dtype (:issue:`18116`) -- Bug in ``Categorical.unique()`` returning read-only ``codes`` array when all categories were ``NaN`` (:issue:`18051`) - -String -^^^^^^ - -- :meth:`Series.str.split()` will now propogate ``NaN`` values across all expanded columns instead of ``None`` (:issue:`18450`) - -Other -^^^^^ - -- -- diff --git a/grp.patch b/grp.patch deleted file mode 100644 index 7f8b07ed47437..0000000000000 --- a/grp.patch +++ /dev/null @@ -1,13 +0,0 @@ ---- a/pandas/core/groupby.py -+++ b/pandas/core/groupby.py -@@ -3022,7 +3022,9 @@ class SeriesGroupBy(GroupBy): - if isinstance(func_or_funcs, compat.string_types): - return getattr(self, func_or_funcs)(*args, **kwargs) - -- if hasattr(func_or_funcs, '__iter__'): -+ if isinstance(func_or_funcs, collections.Iterable): -+ # Catch instances of lists / tuples -+ # but not the class list / tuple itself. - ret = self._aggregate_multiple_funcs(func_or_funcs, - (_level or 0) + 1) - else: diff --git a/test_agg.py b/test_agg.py deleted file mode 100644 index 85225e96b0294..0000000000000 --- a/test_agg.py +++ /dev/null @@ -1,44 +0,0 @@ -import pandas as pd -import numpy as np - -def f(x): - return list(x) - -#df = pd.DataFrame({'A' : [1, 1, 3], 'B' : [1, 2, 4]}) -#result = df.groupby('A').aggregate(f) - - -#df = pd.DataFrame({'A' : [1, 1, 3], 'B' : [1, 2, 4]}) -#result = df.groupby('A').aggregate(list) -#result = df.groupby('A').agg(list) - -df = pd.DataFrame({'A' : [1, 1, 3], 'B' : [1, 1, 4], 'C' : [1, 3, 4]}) -#result = df.groupby(['A', 'B']).aggregate(pd.Series) - - -#df = pd.DataFrame({'A': [1, 1, 1, 3, 3, 3], - # 'B': [1, 1, 1, 4, 4, 4], 'C': [1, 1, 1, 3, 4, 4]}) - -#print ('series ') -result = df.groupby('A')['C'].aggregate(np.array) -#print (result) -# -result = df.groupby(['A', 'B']).aggregate(np.array) -#print (result) -# -# result = df.groupby('A')['C'].aggregate(list) -# print (result) - -def f(x): - return np.array(x) - -print ('array') -result = df.groupby(['A', 'B']).aggregate(f) -print (result) - -# result = df.groupby('A')['C'].aggregate(tuple) -# expected = pd.Series([(1, 1, 1), (3, 4, 4)], index=[1, 3], name='C') -# expected.index.name = 'A' - - - From 7a4342fc176b893398e8bb00886af5d099a55ab6 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 26 Nov 2017 11:17:19 -0500 Subject: [PATCH 6/6] fixups --- doc/source/whatsnew/v0.21.1.txt | 2 +- grp_test.patch | 15 --------------- pandas/tests/groupby/test_aggregate.py | 2 +- 3 files changed, 2 insertions(+), 17 deletions(-) delete mode 100644 grp_test.patch diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt index 71cdcfa98cc8d..4c6cdb9846305 100644 --- a/doc/source/whatsnew/v0.21.1.txt +++ b/doc/source/whatsnew/v0.21.1.txt @@ -147,4 +147,4 @@ Other ^^^^^ - -- \ No newline at end of file +- diff --git a/grp_test.patch b/grp_test.patch deleted file mode 100644 index 275bb8419a071..0000000000000 --- a/grp_test.patch +++ /dev/null @@ -1,15 +0,0 @@ ---- a/pandas/tests/groupby/test_groupby.py -+++ b/pandas/tests/groupby/test_groupby.py -@@ -2725,3 +2725,12 @@ def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): - expected = f(df.groupby(tups)[field]) - for k, v in compat.iteritems(expected): - assert (result[k] == v) -+ -+ -+def test_tuple(): -+ df = pd.DataFrame({'A': [1, 1, 1, 3, 3, 3], -+ 'B': [1, 1, 1, 4, 4, 4], 'C': [1, 1, 1, 3, 4, 4]}) -+ -+ result = df.groupby(['A', 'B']).aggregate(tuple) -+ result2 = df.groupby('A').aggregate(tuple) -+ result2 = df.groupby('A').aggregate([tuple]) diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index 040d8ac218ceb..3d27df31cee6e 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -637,7 +637,7 @@ def test_agg_consistency(self): def P1(a): try: return np.percentile(a.dropna(), q=1) - except: + except Exception: return np.nan import datetime as dt