pandas-dev · jreback · Nov 26, 2017 · Nov 25, 2017 · Nov 25, 2017 · Nov 25, 2017
diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt
@@ -147,4 +147,4 @@ Other
 ^^^^^
 
 -
--
+-
diff --git a/doc/source/whatsnew/v0.21.1.txt.orig b/doc/source/whatsnew/v0.21.1.txt.orig
@@ -0,0 +1,154 @@
+.. _whatsnew_0211:
+
+v0.21.1
+-------
+
+This is a minor release from 0.21.1 and includes a number of deprecations, new
+features, enhancements, and performance improvements along with a large number
+of bug fixes. We recommend that all users upgrade to this version.
+
+.. _whatsnew_0211.enhancements:
+
+New features
+~~~~~~~~~~~~
+
+-
+-
+-
+
+.. _whatsnew_0211.enhancements.other:
+
+Other Enhancements
+^^^^^^^^^^^^^^^^^^
+
+- :meth:`Timestamp.timestamp` is now available in Python 2.7. (:issue:`17329`)
+-
+-
+
+.. _whatsnew_0211.deprecations:
+
+Deprecations
+~~~~~~~~~~~~
+
+-
+-
+-
+
+.. _whatsnew_0211.performance:
+
+Performance Improvements
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+- Improved performance of plotting large series/dataframes (:issue:`18236`).
+-
+-
+
+.. _whatsnew_0211.docs:
+
+Documentation Changes
+~~~~~~~~~~~~~~~~~~~~~
+
+-
+-
+-
+
+.. _whatsnew_0211.bug_fixes:
+
+Bug Fixes
+~~~~~~~~~
+
+Conversion
+^^^^^^^^^^
+
+- Bug in :class:`TimedeltaIndex` subtraction could incorrectly overflow when ``NaT`` is present (:issue:`17791`)
+- Bug in :class:`DatetimeIndex` subtracting datetimelike from DatetimeIndex could fail to overflow (:issue:`18020`)
+- Bug in :meth:`IntervalIndex.copy` when copying and ``IntervalIndex`` with non-default ``closed`` (:issue:`18339`)
+- Bug in :func:`DataFrame.to_dict` where columns of datetime that are tz-aware were not converted to required arrays when used with ``orient='records'``, raising``TypeError` (:issue:`18372`)
+-
+-
+
+Indexing
+^^^^^^^^
+
+- Bug in a boolean comparison of a ``datetime.datetime`` and a ``datetime64[ns]`` dtype Series (:issue:`17965`)
+- Bug where a ``MultiIndex`` with more than a million records was not raising ``AttributeError`` when trying to access a missing attribute (:issue:`18165`)
+- Bug in :class:`IntervalIndex` constructor when a list of intervals is passed with non-default ``closed`` (:issue:`18334`)
+- Bug in ``Index.putmask`` when an invalid mask passed (:issue:`18368`)
+-
+
+I/O
+^^^
+
+- Bug in class:`~pandas.io.stata.StataReader` not converting date/time columns with display formatting addressed (:issue:`17990`). Previously columns with display formatting were normally left as ordinal numbers and not converted to datetime objects.
+- Bug in :func:`read_csv` when reading a compressed UTF-16 encoded file (:issue:`18071`)
+- Bug in :func:`read_csv` for handling null values in index columns when specifying ``na_filter=False`` (:issue:`5239`)
+- Bug in :func:`read_csv` when reading numeric category fields with high cardinality (:issue:`18186`)
+- Bug in :meth:`DataFrame.to_csv` when the table had ``MultiIndex`` columns, and a list of strings was passed in for ``header`` (:issue:`5539`)
+- :func:`read_parquet` now allows to specify the columns to read from a parquet file (:issue:`18154`)
+- :func:`read_parquet` now allows to specify kwargs which are passed to the respective engine (:issue:`18216`)
+- Bug in parsing integer datetime-like columns with specified format in ``read_sql`` (:issue:`17855`).
+- Bug in :meth:`DataFrame.to_msgpack` when serializing data of the numpy.bool_ datatype (:issue:`18390`)
+
+
+Plotting
+^^^^^^^^
+
+-
+-
+-
+
+Groupby/Resample/Rolling
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+- Bug in ``DataFrame.resample(...).apply(...)`` when there is a callable that returns different columns (:issue:`15169`)
+- Bug in ``DataFrame.resample(...)`` when there is a time change (DST) and resampling frequecy is 12h or higher (:issue:`15549`)
+- Bug in ``pd.DataFrameGroupBy.count()`` when counting over a datetimelike column (:issue:`13393`)
+<<<<<<< HEAD
+- Bug in ``rolling.var`` where calculation is inaccurate with a zero-valued array (:issue:`18430`)
+=======
+- Bug when grouping by a single column and aggregating with a class like`list` or `tuple` (:issue:`18079`)
+>>>>>>> added whatsnew
+-
+-
+
+Sparse
+^^^^^^
+
+-
+-
+-
+
+Reshaping
+^^^^^^^^^
+
+- Error message in ``pd.merge_asof()`` for key datatype mismatch now includes datatype of left and right key (:issue:`18068`)
+- Bug in ``pd.concat`` when empty and non-empty DataFrames or Series are concatenated (:issue:`18178` :issue:`18187`)
+- Bug in ``DataFrame.filter(...)`` when :class:`unicode` is passed as a condition in Python 2 (:issue:`13101`)
+-
+
+Numeric
+^^^^^^^
+
+- Bug in ``pd.Series.rolling.skew()`` and ``rolling.kurt()`` with all equal values has floating issue (:issue:`18044`)
+-
+-
+-
+
+Categorical
+^^^^^^^^^^^
+
+- Bug in :meth:`DataFrame.astype` where casting to 'category' on an empty ``DataFrame`` causes a segmentation fault (:issue:`18004`)
+- Error messages in the testing module have been improved when items have different ``CategoricalDtype`` (:issue:`18069`)
+- ``CategoricalIndex`` can now correctly take a ``pd.api.types.CategoricalDtype`` as its dtype (:issue:`18116`)
+- Bug in ``Categorical.unique()`` returning read-only ``codes``  array when all categories were ``NaN`` (:issue:`18051`)
+
+String
+^^^^^^
+
+- :meth:`Series.str.split()` will now propogate ``NaN`` values across all expanded columns instead of ``None`` (:issue:`18450`)
+
+Other
+^^^^^
+
+-
+-
diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
@@ -168,7 +168,7 @@ Plotting
 Groupby/Resample/Rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
--
+- Bug when grouping by a single column and aggregating with a class like ``list`` or ``tuple`` (:issue:`18079`)
 -
 -
 

diff --git a/grp.patch b/grp.patch
@@ -0,0 +1,13 @@
+--- a/pandas/core/groupby.py
++++ b/pandas/core/groupby.py
+@@ -3022,7 +3022,9 @@ class SeriesGroupBy(GroupBy):
+         if isinstance(func_or_funcs, compat.string_types):
+             return getattr(self, func_or_funcs)(*args, **kwargs)
+
+-        if hasattr(func_or_funcs, '__iter__'):
++        if isinstance(func_or_funcs, collections.Iterable):
++            # Catch instances of lists / tuples
++            # but not the class list / tuple itself.
+             ret = self._aggregate_multiple_funcs(func_or_funcs,
+                                                  (_level or 0) + 1)
+         else:
diff --git a/grp_test.patch b/grp_test.patch
@@ -0,0 +1,15 @@
+--- a/pandas/tests/groupby/test_groupby.py
++++ b/pandas/tests/groupby/test_groupby.py
+@@ -2725,3 +2725,12 @@ def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
+     expected = f(df.groupby(tups)[field])
+     for k, v in compat.iteritems(expected):
+         assert (result[k] == v)
++
++
++def test_tuple():
++    df = pd.DataFrame({'A': [1, 1, 1, 3, 3, 3],
++                       'B': [1, 1, 1, 4, 4, 4], 'C': [1, 1, 1, 3, 4, 4]})
++
++    result = df.groupby(['A', 'B']).aggregate(tuple)
++    result2 = df.groupby('A').aggregate(tuple)
++    result2 = df.groupby('A').aggregate([tuple])
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -2299,8 +2299,7 @@ def _aggregate_series_pure_python(self, obj, func):
         for label, group in splitter:
             res = func(group)
             if result is None:
-                if (isinstance(res, (Series, Index, np.ndarray)) or
-                        isinstance(res, list)):
+                if (isinstance(res, (Series, Index, np.ndarray))):
                     raise ValueError('Function does not reduce')
                 result = np.empty(ngroups, dtype='O')
 
@@ -3022,7 +3021,9 @@ def aggregate(self, func_or_funcs, *args, **kwargs):
         if isinstance(func_or_funcs, compat.string_types):
             return getattr(self, func_or_funcs)(*args, **kwargs)
 
-        if hasattr(func_or_funcs, '__iter__'):
+        if isinstance(func_or_funcs, collections.Iterable):
+            # Catch instances of lists / tuples
+            # but not the class list / tuple itself.
             ret = self._aggregate_multiple_funcs(func_or_funcs,
                                                  (_level or 0) + 1)
         else:

diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py
@@ -892,3 +892,36 @@ def test_sum_uint64_overflow(self):
         expected.index.name = 0
         result = df.groupby(0).sum()
         tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("structure, expected", [
+        (tuple, pd.DataFrame({'C': {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}})),
+        (list, pd.DataFrame({'C': {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}})),
+        (lambda x: tuple(x), pd.DataFrame({'C': {(1, 1): (1, 1, 1),
+                                                 (3, 4): (3, 4, 4)}})),
+        (lambda x: list(x), pd.DataFrame({'C': {(1, 1): [1, 1, 1],
+                                                (3, 4): [3, 4, 4]}}))
+    ])
+    def test_agg_structs_dataframe(self, structure, expected):
+        df = pd.DataFrame({'A': [1, 1, 1, 3, 3, 3],
+                           'B': [1, 1, 1, 4, 4, 4], 'C': [1, 1, 1, 3, 4, 4]})
+
+        result = df.groupby(['A', 'B']).aggregate(structure)
+        expected.index.names = ['A', 'B']
+        assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("structure, expected", [
+        (tuple, pd.Series([(1, 1, 1), (3, 4, 4)], index=[1, 3], name='C')),
+        (list, pd.Series([[1, 1, 1], [3, 4, 4]], index=[1, 3], name='C')),
+        (lambda x: tuple(x), pd.Series([(1, 1, 1), (3, 4, 4)],
+                                       index=[1, 3], name='C')),
+        (lambda x: list(x), pd.Series([[1, 1, 1], [3, 4, 4]],
+                                      index=[1, 3], name='C'))
+    ])
+    def test_agg_structs_series(self, structure, expected):
+        # Issue #18079
+        df = pd.DataFrame({'A': [1, 1, 1, 3, 3, 3],
+                           'B': [1, 1, 1, 4, 4, 4], 'C': [1, 1, 1, 3, 4, 4]})
+
+        result = df.groupby('A')['C'].aggregate(structure)
+        expected.index.name = 'A'
+        assert_series_equal(result, expected)
diff --git a/test_agg.py b/test_agg.py
@@ -0,0 +1,44 @@
+import pandas as pd
+import numpy as np
+
+def f(x):
+   return list(x)
+
+#df = pd.DataFrame({'A' : [1, 1, 3], 'B' :  [1, 2, 4]})
+#result = df.groupby('A').aggregate(f)
+
+
+#df = pd.DataFrame({'A' : [1, 1, 3], 'B' :  [1, 2, 4]})
+#result = df.groupby('A').aggregate(list)
+#result = df.groupby('A').agg(list)
+
+df = pd.DataFrame({'A' : [1, 1, 3], 'B' :  [1, 1, 4], 'C' :  [1, 3, 4]})
+#result = df.groupby(['A', 'B']).aggregate(pd.Series)
+
+
+#df = pd.DataFrame({'A': [1, 1, 1, 3, 3, 3],
+ #                          'B': [1, 1, 1, 4, 4, 4], 'C': [1, 1, 1, 3, 4, 4]})
+
+#print ('series ')
+result = df.groupby('A')['C'].aggregate(np.array)
+#print (result)
+#
+result = df.groupby(['A', 'B']).aggregate(np.array)
+#print (result)
+#
+# result = df.groupby('A')['C'].aggregate(list)
+# print (result)
+
+def f(x):
+   return np.array(x)
+
+print ('array')
+result = df.groupby(['A', 'B']).aggregate(f)
+print (result)
+
+# result = df.groupby('A')['C'].aggregate(tuple)
+# expected = pd.Series([(1, 1, 1), (3, 4, 4)], index=[1, 3], name='C')
+# expected.index.name = 'A'
+
+
+
-Original file line number
+Diff line change
@@ Expand Up / @@ -147,4 +147,4 @@ Other @@
     ^^^^^
     -
-    -
+    -