From 035a62d67585977a05ca391436ed87b86a720a1e Mon Sep 17 00:00:00 2001
From: danielballan <daniel.b.allan@gmail.com>
Date: Mon, 14 Apr 2014 16:50:28 -0400
Subject: [PATCH 1/9] ENH: Allow aggregate numeric operations on timedelta64.

---
 pandas/core/groupby.py       |  9 +++++++--
 pandas/tests/test_groupby.py | 13 +++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index cb5dedc887bca..efef5d94e4d52 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1084,7 +1084,8 @@ def _cython_agg_general(self, how, numeric_only=True):
         output = {}
         for name, obj in self._iterate_slices():
             is_numeric = is_numeric_dtype(obj.dtype)
-            if numeric_only and not is_numeric:
+            is_timdelta64 = is_timedelta64_dtype(obj.dtype)
+            if numeric_only and not (is_numeric or is_timdelta64):
                 continue
 
             try:
@@ -2567,8 +2568,12 @@ def _cython_agg_blocks(self, how, numeric_only=True):
             data = data.get_numeric_data(copy=False)
 
         for block in data.blocks:
-
             values = block._try_operate(block.values)
+            is_numeric = is_numeric_dtype(values.dtype)
+            is_timedelta64 = is_timedelta64_dtype(values.dtype)
+
+            if numeric_only and not (is_numeric or is_timedelta64):
+                continue
 
             if block.is_numeric:
                 values = _algos.ensure_float64(values)
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index 4077f468d8b1f..5b84c4ba18b00 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -4365,6 +4365,19 @@ def test_index_label_overlaps_location(self):
         expected = ser.take([1, 3, 4])
         assert_series_equal(actual, expected)
 
+    def test_groupby_methods_on_timedelta64(self):
+        df = self.df.copy().iloc[:4]
+        df['E'] = pd.to_timedelta(['00:00:01', '00:00:02', '00:00:03', '00:00:04'])
+        # DataFrameGroupBy
+        actual = df.groupby('A').mean()['E']
+        expected = pd.to_timedelta(Series(['00:00:03', '00:00:02'], index=['bar', 'foo'], name='E'))
+        assert_series_equal(actual, expected)
+
+        ser = df['E']
+        # SeriesGroupBy
+        actual = ser.groupby(df['A']).mean()
+        assert_series_equal(actual, expected)
+
     def test_groupby_selection_with_methods(self):
         # some methods which require DatetimeIndex
         rng = pd.date_range('2014', periods=len(self.df))

From 5273f579ffb795164bd1ff6d0794b7b8a0d72571 Mon Sep 17 00:00:00 2001
From: danielballan <daniel.b.allan@gmail.com>
Date: Tue, 15 Apr 2014 08:21:02 -0400
Subject: [PATCH 2/9] TST: More tests

---
 pandas/tests/test_groupby.py | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index 5b84c4ba18b00..6738683e00c2e 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -7,7 +7,7 @@
 from datetime import datetime
 from numpy import nan
 
-from pandas import date_range,bdate_range, Timestamp
+from pandas import date_range,bdate_range, Timestamp, _np_version_under1p7
 from pandas.core.index import Index, MultiIndex, Int64Index
 from pandas.core.api import Categorical, DataFrame
 from pandas.core.groupby import (SpecificationError, DataError,
@@ -603,6 +603,26 @@ def f(grp):
         e.name = None
         assert_series_equal(result,e)
 
+        # ...and with timedeltas
+        if not _np_version_under1p7:
+            df1 = df.copy()
+            df1['D'] = pd.to_timedelta(['00:00:01', '00:00:02', '00:00:03',
+                                       '00:00:04', '00:00:05', '00:00:06', '00:00:07'])
+            result = df1.groupby('A').apply(f)[['D']]
+            e = df1.groupby('A').first()[['D']]
+            e.loc['Pony'] = np.nan
+            assert_frame_equal(result, e)
+
+            def f(grp):
+                if grp.name == 'Pony':
+                    return None
+                return grp.iloc[0].loc['D']
+            result = df1.groupby('A').apply(f)
+            e = df1.groupby('A').first()['D'].copy()
+            e.loc['Pony'] = np.nan
+            e.name = None
+            assert_series_equal(result,e)
+
     def test_agg_api(self):
 
         # GH 6337

From 00cbc23047404a39824d86a325a775f9be50ad2c Mon Sep 17 00:00:00 2001
From: danielballan <daniel.b.allan@gmail.com>
Date: Thu, 17 Apr 2014 09:12:50 -0400
Subject: [PATCH 3/9] WIP: Skip timedelta tests if np < 1.7

---
 pandas/tests/test_groupby.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index 6738683e00c2e..a96afd57a06dc 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -41,6 +41,12 @@ def _skip_if_mpl_not_installed():
     except ImportError:
         raise nose.SkipTest("matplotlib not installed")
 
+
+def _skip_if_np_version_under1p7():
+    if _np_version_under1p7:
+        raise nose.SkipTest("numpy version 1.7 has throughly broken timedelta")
+
+
 def commonSetUp(self):
     self.dateRange = bdate_range('1/1/2005', periods=250)
     self.stringIndex = Index([rands(8).upper() for x in range(250)])
@@ -607,21 +613,24 @@ def f(grp):
         if not _np_version_under1p7:
             df1 = df.copy()
             df1['D'] = pd.to_timedelta(['00:00:01', '00:00:02', '00:00:03',
-                                       '00:00:04', '00:00:05', '00:00:06', '00:00:07'])
+                                        '00:00:04', '00:00:05', '00:00:06',
+                                        '00:00:07'])
             result = df1.groupby('A').apply(f)[['D']]
             e = df1.groupby('A').first()[['D']]
             e.loc['Pony'] = np.nan
+            print(type(result))
+            print(type(e))
             assert_frame_equal(result, e)
 
             def f(grp):
                 if grp.name == 'Pony':
                     return None
                 return grp.iloc[0].loc['D']
-            result = df1.groupby('A').apply(f)
+            result = df1.groupby('A').apply(f)['D']
             e = df1.groupby('A').first()['D'].copy()
             e.loc['Pony'] = np.nan
             e.name = None
-            assert_series_equal(result,e)
+            assert_series_equal(result, e)
 
     def test_agg_api(self):
 
@@ -4386,6 +4395,7 @@ def test_index_label_overlaps_location(self):
         assert_series_equal(actual, expected)
 
     def test_groupby_methods_on_timedelta64(self):
+        _skip_if_np_version_under1p7()
         df = self.df.copy().iloc[:4]
         df['E'] = pd.to_timedelta(['00:00:01', '00:00:02', '00:00:03', '00:00:04'])
         # DataFrameGroupBy

From 0974841d205b89bf69450d93c921fa282f06dc4b Mon Sep 17 00:00:00 2001
From: danielballan <daniel.b.allan@gmail.com>
Date: Sun, 22 Jun 2014 10:00:15 -0400
Subject: [PATCH 4/9] WIP: Improved approach, but all is broken

---
 pandas/core/groupby.py | 35 ++++++++++++++++++++++++++++-------
 1 file changed, 28 insertions(+), 7 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index efef5d94e4d52..d339888b30a68 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1083,13 +1083,24 @@ def _try_cast(self, result, obj):
     def _cython_agg_general(self, how, numeric_only=True):
         output = {}
         for name, obj in self._iterate_slices():
-            is_numeric = is_numeric_dtype(obj.dtype)
-            is_timdelta64 = is_timedelta64_dtype(obj.dtype)
-            if numeric_only and not (is_numeric or is_timdelta64):
+            if is_numeric_dtype(obj.dtype):
+                obj = com.ensure_float(obj)
+                is_numeric = True
+                out_dtype = 'f%d' % obj.dtype.itemsize
+            else:
+                is_numeric = issubclass(obj.dtype.type, (np.datetime64,
+                                                            np.timedelta64))
+                out_dtype = 'float64'
+                if is_numeric:
+                    values = obj.view('int64')
+                else:
+                    values = obj.astype(object)
+
+            if numeric_only and not is_numeric:
                 continue
 
             try:
-                result, names = self.grouper.aggregate(obj.values, how)
+                result, names = self.grouper.aggregate(values, how)
             except AssertionError as e:
                 raise GroupByError(str(e))
             output[name] = self._try_cast(result, obj)
@@ -2569,12 +2580,22 @@ def _cython_agg_blocks(self, how, numeric_only=True):
 
         for block in data.blocks:
             values = block._try_operate(block.values)
-            is_numeric = is_numeric_dtype(values.dtype)
-            is_timedelta64 = is_timedelta64_dtype(values.dtype)
 
-            if numeric_only and not (is_numeric or is_timedelta64):
+            if is_numeric_dtype(values.dtype):
+                values = com.ensure_float(values)
+                is_numeric = True
+            else:
+                is_numeric = issubclass(values.dtype.type, (np.datetime64,
+                                                            np.timedelta64))
+                if is_numeric:
+                    values = values.view('int64')
+                else:
+                    values = values.astype(object)
+
+            if numeric_only and not is_numeric:
                 continue
 
+            # TODO DAN
             if block.is_numeric:
                 values = _algos.ensure_float64(values)
 

From f5ff0618a477278c19e5c3741b18ba43feacbde2 Mon Sep 17 00:00:00 2001
From: danielballan <daniel.b.allan@gmail.com>
Date: Sun, 22 Jun 2014 22:32:33 -0400
Subject: [PATCH 5/9] WIP: Improved approach, 6 tests failing.

---
 pandas/core/groupby.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index d339888b30a68..36bf87c6aba8d 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1087,6 +1087,7 @@ def _cython_agg_general(self, how, numeric_only=True):
                 obj = com.ensure_float(obj)
                 is_numeric = True
                 out_dtype = 'f%d' % obj.dtype.itemsize
+                values = obj.values
             else:
                 is_numeric = issubclass(obj.dtype.type, (np.datetime64,
                                                             np.timedelta64))

From 95d67ead28820c5752607c9cc7ac35f0a0597636 Mon Sep 17 00:00:00 2001
From: danielballan <daniel.b.allan@gmail.com>
Date: Mon, 23 Jun 2014 22:07:36 -0400
Subject: [PATCH 6/9] FIX: Fix downcasting of float to timedelta.

---
 pandas/core/common.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/pandas/core/common.py b/pandas/core/common.py
index f8f5928ca7d51..171ce9462452f 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -1271,14 +1271,23 @@ def _possibly_downcast_to_dtype(result, dtype):
         dtype = np.dtype(dtype)
 
     try:
-
         # don't allow upcasts here (except if empty)
+        print dtype.kind, result.dtype.kind
         if dtype.kind == result.dtype.kind:
             if result.dtype.itemsize <= dtype.itemsize and np.prod(result.shape):
                 return result
 
         if issubclass(dtype.type, np.floating):
             return result.astype(dtype)
+
+        # a datetimelike
+        elif ((dtype.kind == 'M' and result.dtype.kind == 'i') or
+              dtype.kind == 'm'):
+            try:
+                result = result.astype(dtype)
+            except:
+                pass
+
         elif dtype == np.bool_ or issubclass(dtype.type, np.integer):
 
             # if we don't have any elements, just astype it
@@ -1309,13 +1318,6 @@ def _possibly_downcast_to_dtype(result, dtype):
                     if (new_result == result).all():
                         return new_result
 
-        # a datetimelike
-        elif dtype.kind in ['M','m'] and result.dtype.kind in ['i']:
-            try:
-                result = result.astype(dtype)
-            except:
-                pass
-
     except:
         pass
 

From c1e81a4a0f7fda0d362f1de910a11562689574a0 Mon Sep 17 00:00:00 2001
From: danielballan <daniel.b.allan@gmail.com>
Date: Mon, 23 Jun 2014 22:15:53 -0400
Subject: [PATCH 7/9] CLN: Removed unused variable. PEP8.

---
 pandas/core/groupby.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 36bf87c6aba8d..629b64d17bf77 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1090,8 +1090,7 @@ def _cython_agg_general(self, how, numeric_only=True):
                 values = obj.values
             else:
                 is_numeric = issubclass(obj.dtype.type, (np.datetime64,
-                                                            np.timedelta64))
-                out_dtype = 'float64'
+                                                         np.timedelta64))
                 if is_numeric:
                     values = obj.view('int64')
                 else:

From b47d3c8ef46668ecb7aee8e703a11f9adc66e49b Mon Sep 17 00:00:00 2001
From: danielballan <daniel.b.allan@gmail.com>
Date: Thu, 26 Jun 2014 15:35:04 -0400
Subject: [PATCH 8/9] WIP: Rebased after #7560

---
 pandas/core/groupby.py | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 629b64d17bf77..4b55b8cced559 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -2581,20 +2581,6 @@ def _cython_agg_blocks(self, how, numeric_only=True):
         for block in data.blocks:
             values = block._try_operate(block.values)
 
-            if is_numeric_dtype(values.dtype):
-                values = com.ensure_float(values)
-                is_numeric = True
-            else:
-                is_numeric = issubclass(values.dtype.type, (np.datetime64,
-                                                            np.timedelta64))
-                if is_numeric:
-                    values = values.view('int64')
-                else:
-                    values = values.astype(object)
-
-            if numeric_only and not is_numeric:
-                continue
-
             # TODO DAN
             if block.is_numeric:
                 values = _algos.ensure_float64(values)

From a89ab8b4b4ca177d6e0a59a708a8a1985e046c9b Mon Sep 17 00:00:00 2001
From: danielballan <daniel.b.allan@gmail.com>
Date: Tue, 27 Jan 2015 12:40:32 -0500
Subject: [PATCH 9/9] MNT: Remove checks for numpy < 1.7.

---
 pandas/tests/test_groupby.py | 49 ++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 28 deletions(-)

diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index a96afd57a06dc..734287baaa50d 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -7,7 +7,7 @@
 from datetime import datetime
 from numpy import nan
 
-from pandas import date_range,bdate_range, Timestamp, _np_version_under1p7
+from pandas import date_range,bdate_range, Timestamp
 from pandas.core.index import Index, MultiIndex, Int64Index
 from pandas.core.api import Categorical, DataFrame
 from pandas.core.groupby import (SpecificationError, DataError,
@@ -42,11 +42,6 @@ def _skip_if_mpl_not_installed():
         raise nose.SkipTest("matplotlib not installed")
 
 
-def _skip_if_np_version_under1p7():
-    if _np_version_under1p7:
-        raise nose.SkipTest("numpy version 1.7 has throughly broken timedelta")
-
-
 def commonSetUp(self):
     self.dateRange = bdate_range('1/1/2005', periods=250)
     self.stringIndex = Index([rands(8).upper() for x in range(250)])
@@ -610,27 +605,26 @@ def f(grp):
         assert_series_equal(result,e)
 
         # ...and with timedeltas
-        if not _np_version_under1p7:
-            df1 = df.copy()
-            df1['D'] = pd.to_timedelta(['00:00:01', '00:00:02', '00:00:03',
-                                        '00:00:04', '00:00:05', '00:00:06',
-                                        '00:00:07'])
-            result = df1.groupby('A').apply(f)[['D']]
-            e = df1.groupby('A').first()[['D']]
-            e.loc['Pony'] = np.nan
-            print(type(result))
-            print(type(e))
-            assert_frame_equal(result, e)
-
-            def f(grp):
-                if grp.name == 'Pony':
-                    return None
-                return grp.iloc[0].loc['D']
-            result = df1.groupby('A').apply(f)['D']
-            e = df1.groupby('A').first()['D'].copy()
-            e.loc['Pony'] = np.nan
-            e.name = None
-            assert_series_equal(result, e)
+        df1 = df.copy()
+        df1['D'] = pd.to_timedelta(['00:00:01', '00:00:02', '00:00:03',
+                                    '00:00:04', '00:00:05', '00:00:06',
+                                    '00:00:07'])
+        result = df1.groupby('A').apply(f)[['D']]
+        e = df1.groupby('A').first()[['D']]
+        e.loc['Pony'] = np.nan
+        print(type(result))
+        print(type(e))
+        assert_frame_equal(result, e)
+
+        def f(grp):
+            if grp.name == 'Pony':
+                return None
+            return grp.iloc[0].loc['D']
+        result = df1.groupby('A').apply(f)['D']
+        e = df1.groupby('A').first()['D'].copy()
+        e.loc['Pony'] = np.nan
+        e.name = None
+        assert_series_equal(result, e)
 
     def test_agg_api(self):
 
@@ -4395,7 +4389,6 @@ def test_index_label_overlaps_location(self):
         assert_series_equal(actual, expected)
 
     def test_groupby_methods_on_timedelta64(self):
-        _skip_if_np_version_under1p7()
         df = self.df.copy().iloc[:4]
         df['E'] = pd.to_timedelta(['00:00:01', '00:00:02', '00:00:03', '00:00:04'])
         # DataFrameGroupBy