From 82d19dd50755b66c6e7cefd57b0a693e61619a4d Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 24 Sep 2016 09:07:47 -0400
Subject: [PATCH] PERF: faster grouping

remove pandas.core.groupby._groupby_indices to use algos.groupsort_indexer
add Categorical._reverse_indexer to facilitate

closes #14293
---
 asv_bench/benchmarks/gil.py           | 173 ++++++++-----------------
 asv_bench/benchmarks/groupby.py       |  26 ++++
 doc/source/whatsnew/v0.19.0.txt       |   2 +
 pandas/algos.pyx                      | 132 ++++---------------
 pandas/core/categorical.py            |  40 ++++++
 pandas/core/groupby.py                |  26 +---
 pandas/indexes/base.py                |  23 +++-
 pandas/indexes/numeric.py             |   2 -
 pandas/src/algos_common_helper.pxi    | 175 --------------------------
 pandas/src/algos_common_helper.pxi.in |  30 -----
 pandas/tests/indexes/test_base.py     |   2 +-
 pandas/tests/indexes/test_numeric.py  |  13 +-
 pandas/tests/test_groupby.py          |  13 +-
 pandas/tests/types/test_inference.py  |  15 ++-
 pandas/tseries/base.py                |   4 -
 pandas/types/common.py                |   7 ++
 16 files changed, 201 insertions(+), 482 deletions(-)

diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py
index 2eb6786356511..1c82560c7e630 100644
--- a/asv_bench/benchmarks/gil.py
+++ b/asv_bench/benchmarks/gil.py
@@ -22,7 +22,7 @@ def wrapper(fname):
         return wrapper
 
 
-class nogil_groupby_count_2(object):
+class nogil_groupby_base(object):
     goal_time = 0.2
 
     def setup(self):
@@ -33,6 +33,9 @@ def setup(self):
         if (not have_real_test_parallel):
             raise NotImplementedError
 
+
+class nogil_groupby_count_2(nogil_groupby_base):
+
     def time_nogil_groupby_count_2(self):
         self.pg2()
 
@@ -41,16 +44,7 @@ def pg2(self):
         self.df.groupby('key')['data'].count()
 
 
-class nogil_groupby_last_2(object):
-    goal_time = 0.2
-
-    def setup(self):
-        self.N = 1000000
-        self.ngroups = 1000
-        np.random.seed(1234)
-        self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), })
-        if (not have_real_test_parallel):
-            raise NotImplementedError
+class nogil_groupby_last_2(nogil_groupby_base):
 
     def time_nogil_groupby_last_2(self):
         self.pg2()
@@ -60,16 +54,7 @@ def pg2(self):
         self.df.groupby('key')['data'].last()
 
 
-class nogil_groupby_max_2(object):
-    goal_time = 0.2
-
-    def setup(self):
-        self.N = 1000000
-        self.ngroups = 1000
-        np.random.seed(1234)
-        self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), })
-        if (not have_real_test_parallel):
-            raise NotImplementedError
+class nogil_groupby_max_2(nogil_groupby_base):
 
     def time_nogil_groupby_max_2(self):
         self.pg2()
@@ -79,16 +64,7 @@ def pg2(self):
         self.df.groupby('key')['data'].max()
 
 
-class nogil_groupby_mean_2(object):
-    goal_time = 0.2
-
-    def setup(self):
-        self.N = 1000000
-        self.ngroups = 1000
-        np.random.seed(1234)
-        self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), })
-        if (not have_real_test_parallel):
-            raise NotImplementedError
+class nogil_groupby_mean_2(nogil_groupby_base):
 
     def time_nogil_groupby_mean_2(self):
         self.pg2()
@@ -98,16 +74,7 @@ def pg2(self):
         self.df.groupby('key')['data'].mean()
 
 
-class nogil_groupby_min_2(object):
-    goal_time = 0.2
-
-    def setup(self):
-        self.N = 1000000
-        self.ngroups = 1000
-        np.random.seed(1234)
-        self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), })
-        if (not have_real_test_parallel):
-            raise NotImplementedError
+class nogil_groupby_min_2(nogil_groupby_base):
 
     def time_nogil_groupby_min_2(self):
         self.pg2()
@@ -117,16 +84,7 @@ def pg2(self):
         self.df.groupby('key')['data'].min()
 
 
-class nogil_groupby_prod_2(object):
-    goal_time = 0.2
-
-    def setup(self):
-        self.N = 1000000
-        self.ngroups = 1000
-        np.random.seed(1234)
-        self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), })
-        if (not have_real_test_parallel):
-            raise NotImplementedError
+class nogil_groupby_prod_2(nogil_groupby_base):
 
     def time_nogil_groupby_prod_2(self):
         self.pg2()
@@ -136,16 +94,7 @@ def pg2(self):
         self.df.groupby('key')['data'].prod()
 
 
-class nogil_groupby_sum_2(object):
-    goal_time = 0.2
-
-    def setup(self):
-        self.N = 1000000
-        self.ngroups = 1000
-        np.random.seed(1234)
-        self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), })
-        if (not have_real_test_parallel):
-            raise NotImplementedError
+class nogil_groupby_sum_2(nogil_groupby_base):
 
     def time_nogil_groupby_sum_2(self):
         self.pg2()
@@ -155,16 +104,7 @@ def pg2(self):
         self.df.groupby('key')['data'].sum()
 
 
-class nogil_groupby_sum_4(object):
-    goal_time = 0.2
-
-    def setup(self):
-        self.N = 1000000
-        self.ngroups = 1000
-        np.random.seed(1234)
-        self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), })
-        if (not have_real_test_parallel):
-            raise NotImplementedError
+class nogil_groupby_sum_4(nogil_groupby_base):
 
     def time_nogil_groupby_sum_4(self):
         self.pg4()
@@ -172,41 +112,16 @@ def time_nogil_groupby_sum_4(self):
     def f(self):
         self.df.groupby('key')['data'].sum()
 
-    def g2(self):
-        for i in range(2):
-            self.f()
-
     def g4(self):
         for i in range(4):
             self.f()
 
-    def g8(self):
-        for i in range(8):
-            self.f()
-
-    @test_parallel(num_threads=2)
-    def pg2(self):
-        self.f()
-
     @test_parallel(num_threads=4)
     def pg4(self):
         self.f()
 
-    @test_parallel(num_threads=8)
-    def pg8(self):
-        self.f()
 
-
-class nogil_groupby_sum_8(object):
-    goal_time = 0.2
-
-    def setup(self):
-        self.N = 1000000
-        self.ngroups = 1000
-        np.random.seed(1234)
-        self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), })
-        if (not have_real_test_parallel):
-            raise NotImplementedError
+class nogil_groupby_sum_8(nogil_groupby_base):
 
     def time_nogil_groupby_sum_8(self):
         self.pg8()
@@ -214,48 +129,68 @@ def time_nogil_groupby_sum_8(self):
     def f(self):
         self.df.groupby('key')['data'].sum()
 
-    def g2(self):
-        for i in range(2):
-            self.f()
-
-    def g4(self):
-        for i in range(4):
-            self.f()
-
     def g8(self):
         for i in range(8):
             self.f()
 
-    @test_parallel(num_threads=2)
-    def pg2(self):
-        self.f()
-
-    @test_parallel(num_threads=4)
-    def pg4(self):
-        self.f()
-
     @test_parallel(num_threads=8)
     def pg8(self):
         self.f()
 
 
-class nogil_groupby_var_2(object):
+class nogil_groupby_var_2(nogil_groupby_base):
+
+    def time_nogil_groupby_var_2(self):
+        self.pg2()
+
+    @test_parallel(num_threads=2)
+    def pg2(self):
+        self.df.groupby('key')['data'].var()
+
+
+class nogil_groupby_groups(object):
     goal_time = 0.2
 
     def setup(self):
-        self.N = 1000000
-        self.ngroups = 1000
         np.random.seed(1234)
-        self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), })
+        self.size = 2**22
+        self.ngroups = 100
+        self.data = Series(np.random.randint(0, self.ngroups, size=self.size))
         if (not have_real_test_parallel):
             raise NotImplementedError
 
-    def time_nogil_groupby_var_2(self):
+    def f(self):
+        self.data.groupby(self.data).groups
+
+
+class nogil_groupby_groups_2(nogil_groupby_groups):
+
+    def time_nogil_groupby_groups(self):
         self.pg2()
 
     @test_parallel(num_threads=2)
     def pg2(self):
-        self.df.groupby('key')['data'].var()
+        self.f()
+
+
+class nogil_groupby_groups_4(nogil_groupby_groups):
+
+    def time_nogil_groupby_groups(self):
+        self.pg4()
+
+    @test_parallel(num_threads=4)
+    def pg4(self):
+        self.f()
+
+
+class nogil_groupby_groups_8(nogil_groupby_groups):
+
+    def time_nogil_groupby_groups(self):
+        self.pg8()
+
+    @test_parallel(num_threads=8)
+    def pg8(self):
+        self.f()
 
 
 class nogil_take1d_float64(object):
diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
index 0611a3564ff7a..e12b00dd06b39 100644
--- a/asv_bench/benchmarks/groupby.py
+++ b/asv_bench/benchmarks/groupby.py
@@ -32,6 +32,32 @@ def time_groupby_apply_dict_return(self):
         self.data.groupby(self.labels).apply(self.f)
 
 
+#----------------------------------------------------------------------
+# groups
+
+class groupby_groups(object):
+    goal_time = 0.1
+
+    def setup(self):
+        size = 2**22
+        self.data = Series(np.random.randint(0, 100, size=size))
+        self.data2 = Series(np.random.randint(0, 10000, size=size))
+        self.data3 = Series(tm.makeStringIndex(100).take(np.random.randint(0, 100, size=size)))
+        self.data4 = Series(tm.makeStringIndex(10000).take(np.random.randint(0, 10000, size=size)))
+
+    def time_groupby_groups_int64_small(self):
+        self.data.groupby(self.data).groups
+
+    def time_groupby_groups_int64_large(self):
+        self.data2.groupby(self.data2).groups
+
+    def time_groupby_groups_object_small(self):
+        self.data3.groupby(self.data3).groups
+
+    def time_groupby_groups_object_large(self):
+        self.data4.groupby(self.data4).groups
+
+
 #----------------------------------------------------------------------
 # First / last functions
 
diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
index 67beb468dce8a..355d12e113398 100644
--- a/doc/source/whatsnew/v0.19.0.txt
+++ b/doc/source/whatsnew/v0.19.0.txt
@@ -1335,6 +1335,7 @@ Other API Changes
 - ``Series`` and ``Index`` now support ``divmod`` which will return a tuple of
   series or indices. This behaves like a standard binary operator with regards
   to broadcasting rules (:issue:`14208`).
+- ``.groupby.groups`` will now return a dictionary of ``Index`` objects, rather than a dictionary of ``np.ndarray`` or ``lists`` (:issue:`14293`)
 
 .. _whatsnew_0190.deprecations:
 
@@ -1407,6 +1408,7 @@ Performance Improvements
 - Improved performance of hashing ``Period`` (:issue:`12817`)
 - Improved performance of ``factorize`` of datetime with timezone (:issue:`13750`)
 - Improved performance of by lazily creating indexing hashtables on larger Indexes (:issue:`14266`)
+- Improved performance of ``groupby.groups`` (:issue:`14293`)
 
 
 .. _whatsnew_0190.bug_fixes:
diff --git a/pandas/algos.pyx b/pandas/algos.pyx
index 8710ef34504d1..04f3ac70bdf5c 100644
--- a/pandas/algos.pyx
+++ b/pandas/algos.pyx
@@ -989,129 +989,47 @@ def is_lexsorted(list list_of_arrays):
 
 
 @cython.boundscheck(False)
-def groupby_indices(dict ids, ndarray[int64_t] labels,
-                    ndarray[int64_t] counts):
-    """
-    turn group_labels output into a combined indexer mapping the labels to
-    indexers
-
-    Parameters
-    ----------
-    ids: dict
-        mapping of label -> group indexer
-    labels: ndarray
-        labels for positions
-    counts: ndarray
-        group counts
-
-    Returns
-    -------
-    list of ndarrays of indices
-
-    """
-    cdef:
-        Py_ssize_t i, n = len(labels)
-        ndarray[int64_t] arr, seen
-        int64_t loc
-        int64_t k
-        dict result = {}
-
-    seen = np.zeros_like(counts)
-
-    cdef int64_t **vecs = <int64_t **> malloc(len(ids) * sizeof(int64_t*))
-    for i from 0 <= i < len(counts):
-        arr = np.empty(counts[i], dtype=np.int64)
-        result[ids[i]] = arr
-        vecs[i] = <int64_t*> arr.data
-
-    for i from 0 <= i < n:
-        k = labels[i]
-
-        # was NaN
-        if k == -1:
-            continue
-
-        loc = seen[k]
-        vecs[k][loc] = i
-        seen[k] = loc + 1
-
-    free(vecs)
-    return result
-
-
 @cython.wraparound(False)
-@cython.boundscheck(False)
-def group_labels(ndarray[object] values):
+def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups):
     """
-    Compute label vector from input values and associated useful data
+    compute a 1-d indexer that is an ordering of the passed index,
+    ordered by the groups. This is a reverse of the label
+    factorization process.
 
     Parameters
     ----------
-    values: object ndarray
+    index: int64 ndarray
+        mappings from group -> position
+    ngroups: int64
+        number of groups
 
-    Returns
-    -------
-    tuple of (reverse mappings of label -> group indexer,
-              factorized labels ndarray,
-              group counts ndarray)
+    return a tuple of (1-d indexer ordered by groups, group counts)
     """
-    cdef:
-        Py_ssize_t i, n = len(values)
-        ndarray[int64_t] labels = np.empty(n, dtype=np.int64)
-        ndarray[int64_t] counts = np.empty(n, dtype=np.int64)
-        dict ids = {}, reverse = {}
-        int64_t idx
-        object val
-        int64_t count = 0
-
-    for i from 0 <= i < n:
-        val = values[i]
-
-        # is NaN
-        if val != val:
-            labels[i] = -1
-            continue
 
-        # for large number of groups, not doing try: except: makes a big
-        # difference
-        if val in ids:
-            idx = ids[val]
-            labels[i] = idx
-            counts[idx] = counts[idx] + 1
-        else:
-            ids[val] = count
-            reverse[count] = val
-            labels[i] = count
-            counts[count] = 1
-            count += 1
-
-    return reverse, labels, counts[:count].copy()
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups):
     cdef:
         Py_ssize_t i, loc, label, n
         ndarray[int64_t] counts, where, result
 
-    # count group sizes, location 0 for NA
     counts = np.zeros(ngroups + 1, dtype=np.int64)
     n = len(index)
-    for i from 0 <= i < n:
-        counts[index[i] + 1] += 1
-
-    # mark the start of each contiguous group of like-indexed data
+    result = np.zeros(n, dtype=np.int64)
     where = np.zeros(ngroups + 1, dtype=np.int64)
-    for i from 1 <= i < ngroups + 1:
-        where[i] = where[i - 1] + counts[i - 1]
 
-    # this is our indexer
-    result = np.zeros(n, dtype=np.int64)
-    for i from 0 <= i < n:
-        label = index[i] + 1
-        result[where[label]] = i
-        where[label] += 1
+    with nogil:
+
+        # count group sizes, location 0 for NA
+        for i from 0 <= i < n:
+            counts[index[i] + 1] += 1
+
+        # mark the start of each contiguous group of like-indexed data
+        for i from 1 <= i < ngroups + 1:
+            where[i] = where[i - 1] + counts[i - 1]
+
+        # this is our indexer
+        for i from 0 <= i < n:
+            label = index[i] + 1
+            result[where[label]] = i
+            where[label] += 1
 
     return result, counts
 
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 6b37a5e2cd202..db48f2a46eaf3 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -6,6 +6,7 @@
 
 from pandas import compat, lib
 from pandas.compat import u, lzip
+import pandas.algos as _algos
 
 from pandas.types.generic import ABCSeries, ABCIndexClass, ABCCategoricalIndex
 from pandas.types.missing import isnull, notnull
@@ -1699,6 +1700,45 @@ def __setitem__(self, key, value):
         lindexer = self._maybe_coerce_indexer(lindexer)
         self._codes[key] = lindexer
 
+    def _reverse_indexer(self):
+        """
+        Compute the inverse of a categorical, returning
+        a dict of categories -> indexers.
+
+        *This is an internal function*
+
+        Returns
+        -------
+        dict of categories -> indexers
+
+        Example
+        -------
+        In [1]: c = pd.Categorical(list('aabca'))
+
+        In [2]: c
+        Out[2]:
+        [a, a, b, c, a]
+        Categories (3, object): [a, b, c]
+
+        In [3]: c.categories
+        Out[3]: Index([u'a', u'b', u'c'], dtype='object')
+
+        In [4]: c.codes
+        Out[4]: array([0, 0, 1, 2, 0], dtype=int8)
+
+        In [5]: c._reverse_indexer()
+        Out[5]: {'a': array([0, 1, 4]), 'b': array([2]), 'c': array([3])}
+
+        """
+        categories = self.categories
+        r, counts = _algos.groupsort_indexer(self.codes.astype('int64'),
+                                             categories.size)
+        counts = counts.cumsum()
+        result = [r[counts[indexer]:counts[indexer + 1]]
+                  for indexer in range(len(counts) - 1)]
+        result = dict(zip(categories, result))
+        return result
+
     # reduction ops #
     def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
                 filter_type=None, **kwds):
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 733fae0c34729..3c376e3188eac 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -28,6 +28,7 @@
                                  _ensure_platform_int,
                                  _ensure_int64,
                                  _ensure_object,
+                                 _ensure_categorical,
                                  _ensure_float)
 from pandas.types.cast import _possibly_downcast_to_dtype
 from pandas.types.missing import isnull, notnull, _maybe_fill
@@ -1657,7 +1658,7 @@ def groups(self):
         else:
             to_groupby = lzip(*(ping.grouper for ping in self.groupings))
             to_groupby = Index(to_groupby)
-            return self.axis.groupby(to_groupby.values)
+            return self.axis.groupby(to_groupby)
 
     @cache_readonly
     def is_monotonic(self):
@@ -2319,7 +2320,8 @@ def ngroups(self):
 
     @cache_readonly
     def indices(self):
-        return _groupby_indices(self.grouper)
+        values = _ensure_categorical(self.grouper)
+        return values._reverse_indexer()
 
     @property
     def labels(self):
@@ -2342,7 +2344,8 @@ def _make_labels(self):
 
     @cache_readonly
     def groups(self):
-        return self.index.groupby(self.grouper)
+        return self.index.groupby(Categorical.from_codes(self.labels,
+                                                         self.group_index))
 
 
 def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
@@ -4436,23 +4439,6 @@ def _reorder_by_uniques(uniques, labels):
     return uniques, labels
 
 
-def _groupby_indices(values):
-
-    if is_categorical_dtype(values):
-        # we have a categorical, so we can do quite a bit
-        # bit better than factorizing again
-        reverse = dict(enumerate(values.categories))
-        codes = values.codes.astype('int64')
-
-        mask = 0 <= codes
-        counts = np.bincount(codes[mask], minlength=values.categories.size)
-    else:
-        reverse, codes, counts = _algos.group_labels(
-            _values_from_object(_ensure_object(values)))
-
-    return _algos.groupby_indices(reverse, codes, counts)
-
-
 def numpy_groupby(data, labels, axis=0):
     s = np.argsort(labels)
     keys, inv = np.unique(labels, return_inverse=True)
diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py
index f430305f5cb91..5138ca5a6b21e 100644
--- a/pandas/indexes/base.py
+++ b/pandas/indexes/base.py
@@ -17,7 +17,9 @@
 
 from pandas.types.generic import ABCSeries, ABCMultiIndex, ABCPeriodIndex
 from pandas.types.missing import isnull, array_equivalent
-from pandas.types.common import (_ensure_int64, _ensure_object,
+from pandas.types.common import (_ensure_int64,
+                                 _ensure_object,
+                                 _ensure_categorical,
                                  _ensure_platform_int,
                                  is_integer,
                                  is_float,
@@ -111,7 +113,6 @@ class Index(IndexOpsMixin, StringAccessorMixin, PandasObject):
     _join_precedence = 1
 
     # Cython methods
-    _groupby = _algos.groupby_object
     _arrmap = _algos.arrmap_object
     _left_indexer_unique = _join.left_join_indexer_unique_object
     _left_indexer = _join.left_join_indexer_object
@@ -2352,13 +2353,13 @@ def _possibly_promote(self, other):
                 return self.astype('object'), other.astype('object')
         return self, other
 
-    def groupby(self, to_groupby):
+    def groupby(self, values):
         """
         Group the index labels by a given array of values.
 
         Parameters
         ----------
-        to_groupby : array
+        values : array
             Values used to determine the groups.
 
         Returns
@@ -2366,7 +2367,19 @@ def groupby(self, to_groupby):
         groups : dict
             {group name -> group labels}
         """
-        return self._groupby(self.values, _values_from_object(to_groupby))
+
+        # TODO: if we are a MultiIndex, we can do better
+        # that converting to tuples
+        from .multi import MultiIndex
+        if isinstance(values, MultiIndex):
+            values = values.values
+        values = _ensure_categorical(values)
+        result = values._reverse_indexer()
+
+        # map to the label
+        result = {k: self.take(v) for k, v in compat.iteritems(result)}
+
+        return result
 
     def map(self, mapper):
         """
diff --git a/pandas/indexes/numeric.py b/pandas/indexes/numeric.py
index b9625f3aaff92..97f7093e99064 100644
--- a/pandas/indexes/numeric.py
+++ b/pandas/indexes/numeric.py
@@ -113,7 +113,6 @@ class Int64Index(NumericIndex):
     """
 
     _typ = 'int64index'
-    _groupby = _algos.groupby_int64
     _arrmap = _algos.arrmap_int64
     _left_indexer_unique = _join.left_join_indexer_unique_int64
     _left_indexer = _join.left_join_indexer_int64
@@ -200,7 +199,6 @@ class Float64Index(NumericIndex):
 
     _typ = 'float64index'
     _engine_type = _index.Float64Engine
-    _groupby = _algos.groupby_float64
     _arrmap = _algos.arrmap_float64
     _left_indexer_unique = _join.left_join_indexer_unique_float64
     _left_indexer = _join.left_join_indexer_float64
diff --git a/pandas/src/algos_common_helper.pxi b/pandas/src/algos_common_helper.pxi
index be587fbc9a019..9dede87e0c15b 100644
--- a/pandas/src/algos_common_helper.pxi
+++ b/pandas/src/algos_common_helper.pxi
@@ -10,7 +10,6 @@ Template for each `dtype` helper function using 1-d template
 - backfill_1d
 - backfill_2d
 - is_monotonic
-- groupby
 - arrmap
 
 WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
@@ -391,35 +390,6 @@ def is_monotonic_float64(ndarray[float64_t] arr, bint timelike):
            is_unique and (is_monotonic_inc or is_monotonic_dec)
 
 
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def groupby_float64(ndarray[float64_t] index, ndarray labels):
-    cdef dict result = {}
-    cdef Py_ssize_t i, length
-    cdef list members
-    cdef object idx, key
-
-    length = len(index)
-
-    if not length == len(labels):
-        raise AssertionError("len(index) != len(labels)")
-
-    for i in range(length):
-        key = util.get_value_1d(labels, i)
-
-        if is_null_datetimelike(key):
-            continue
-
-        idx = index[i]
-        if key in result:
-            members = result[key]
-            members.append(idx)
-        else:
-            result[key] = [idx]
-
-    return result
-
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def arrmap_float64(ndarray[float64_t] index, object func):
@@ -806,35 +776,6 @@ def is_monotonic_float32(ndarray[float32_t] arr, bint timelike):
            is_unique and (is_monotonic_inc or is_monotonic_dec)
 
 
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def groupby_float32(ndarray[float32_t] index, ndarray labels):
-    cdef dict result = {}
-    cdef Py_ssize_t i, length
-    cdef list members
-    cdef object idx, key
-
-    length = len(index)
-
-    if not length == len(labels):
-        raise AssertionError("len(index) != len(labels)")
-
-    for i in range(length):
-        key = util.get_value_1d(labels, i)
-
-        if is_null_datetimelike(key):
-            continue
-
-        idx = index[i]
-        if key in result:
-            members = result[key]
-            members.append(idx)
-        else:
-            result[key] = [idx]
-
-    return result
-
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def arrmap_float32(ndarray[float32_t] index, object func):
@@ -1221,35 +1162,6 @@ def is_monotonic_object(ndarray[object] arr, bint timelike):
            is_unique and (is_monotonic_inc or is_monotonic_dec)
 
 
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def groupby_object(ndarray[object] index, ndarray labels):
-    cdef dict result = {}
-    cdef Py_ssize_t i, length
-    cdef list members
-    cdef object idx, key
-
-    length = len(index)
-
-    if not length == len(labels):
-        raise AssertionError("len(index) != len(labels)")
-
-    for i in range(length):
-        key = util.get_value_1d(labels, i)
-
-        if is_null_datetimelike(key):
-            continue
-
-        idx = index[i]
-        if key in result:
-            members = result[key]
-            members.append(idx)
-        else:
-            result[key] = [idx]
-
-    return result
-
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def arrmap_object(ndarray[object] index, object func):
@@ -1636,35 +1548,6 @@ def is_monotonic_int32(ndarray[int32_t] arr, bint timelike):
            is_unique and (is_monotonic_inc or is_monotonic_dec)
 
 
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def groupby_int32(ndarray[int32_t] index, ndarray labels):
-    cdef dict result = {}
-    cdef Py_ssize_t i, length
-    cdef list members
-    cdef object idx, key
-
-    length = len(index)
-
-    if not length == len(labels):
-        raise AssertionError("len(index) != len(labels)")
-
-    for i in range(length):
-        key = util.get_value_1d(labels, i)
-
-        if is_null_datetimelike(key):
-            continue
-
-        idx = index[i]
-        if key in result:
-            members = result[key]
-            members.append(idx)
-        else:
-            result[key] = [idx]
-
-    return result
-
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def arrmap_int32(ndarray[int32_t] index, object func):
@@ -2051,35 +1934,6 @@ def is_monotonic_int64(ndarray[int64_t] arr, bint timelike):
            is_unique and (is_monotonic_inc or is_monotonic_dec)
 
 
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def groupby_int64(ndarray[int64_t] index, ndarray labels):
-    cdef dict result = {}
-    cdef Py_ssize_t i, length
-    cdef list members
-    cdef object idx, key
-
-    length = len(index)
-
-    if not length == len(labels):
-        raise AssertionError("len(index) != len(labels)")
-
-    for i in range(length):
-        key = util.get_value_1d(labels, i)
-
-        if is_null_datetimelike(key):
-            continue
-
-        idx = index[i]
-        if key in result:
-            members = result[key]
-            members.append(idx)
-        else:
-            result[key] = [idx]
-
-    return result
-
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def arrmap_int64(ndarray[int64_t] index, object func):
@@ -2466,35 +2320,6 @@ def is_monotonic_bool(ndarray[uint8_t] arr, bint timelike):
            is_unique and (is_monotonic_inc or is_monotonic_dec)
 
 
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def groupby_bool(ndarray[uint8_t] index, ndarray labels):
-    cdef dict result = {}
-    cdef Py_ssize_t i, length
-    cdef list members
-    cdef object idx, key
-
-    length = len(index)
-
-    if not length == len(labels):
-        raise AssertionError("len(index) != len(labels)")
-
-    for i in range(length):
-        key = util.get_value_1d(labels, i)
-
-        if is_null_datetimelike(key):
-            continue
-
-        idx = index[i]
-        if key in result:
-            members = result[key]
-            members.append(idx)
-        else:
-            result[key] = [idx]
-
-    return result
-
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def arrmap_bool(ndarray[uint8_t] index, object func):
diff --git a/pandas/src/algos_common_helper.pxi.in b/pandas/src/algos_common_helper.pxi.in
index cec5712c0b7f4..c52c734f727e9 100644
--- a/pandas/src/algos_common_helper.pxi.in
+++ b/pandas/src/algos_common_helper.pxi.in
@@ -10,7 +10,6 @@ Template for each `dtype` helper function using 1-d template
 - backfill_1d
 - backfill_2d
 - is_monotonic
-- groupby
 - arrmap
 
 WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
@@ -413,35 +412,6 @@ def is_monotonic_{{name}}(ndarray[{{c_type}}] arr, bint timelike):
            is_unique and (is_monotonic_inc or is_monotonic_dec)
 
 
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def groupby_{{name}}(ndarray[{{c_type}}] index, ndarray labels):
-    cdef dict result = {}
-    cdef Py_ssize_t i, length
-    cdef list members
-    cdef object idx, key
-
-    length = len(index)
-
-    if not length == len(labels):
-        raise AssertionError("len(index) != len(labels)")
-
-    for i in range(length):
-        key = util.get_value_1d(labels, i)
-
-        if is_null_datetimelike(key):
-            continue
-
-        idx = index[i]
-        if key in result:
-            members = result[key]
-            members.append(idx)
-        else:
-            result[key] = [idx]
-
-    return result
-
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def arrmap_{{name}}(ndarray[{{c_type}}] index, object func):
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 7f68318d4d7d3..421174ded57d5 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -1541,7 +1541,7 @@ def get_reindex_type(target):
     def test_groupby(self):
         idx = Index(range(5))
         groups = idx.groupby(np.array([1, 1, 2, 2, 2]))
-        exp = {1: [0, 1], 2: [2, 3, 4]}
+        exp = {1: pd.Index([0, 1]), 2: pd.Index([2, 3, 4])}
         tm.assert_dict_equal(groups, exp)
 
     def test_equals_op_multiindex(self):
diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py
index b04d7f128e133..b362c9716b672 100644
--- a/pandas/tests/indexes/test_numeric.py
+++ b/pandas/tests/indexes/test_numeric.py
@@ -144,8 +144,8 @@ def test_index_groupby(self):
 
         for idx in [int_idx, float_idx, obj_idx, dt_idx]:
             to_groupby = np.array([1, 2, np.nan, np.nan, 2, 1])
-            self.assertEqual(idx.groupby(to_groupby),
-                             {1.0: [idx[0], idx[5]], 2.0: [idx[1], idx[4]]})
+            tm.assert_dict_equal(idx.groupby(to_groupby),
+                                 {1.0: idx[[0, 5]], 2.0: idx[[1, 4]]})
 
             to_groupby = Index([datetime(2011, 11, 1),
                                 datetime(2011, 12, 1),
@@ -155,11 +155,10 @@ def test_index_groupby(self):
                                 datetime(2011, 11, 1)],
                                tz='UTC').values
 
-            ex_keys = pd.tslib.datetime_to_datetime64(np.array([Timestamp(
-                '2011-11-01'), Timestamp('2011-12-01')]))
-            expected = {ex_keys[0][0]: [idx[0], idx[5]],
-                        ex_keys[0][1]: [idx[1], idx[4]]}
-            self.assertEqual(idx.groupby(to_groupby), expected)
+            ex_keys = [Timestamp('2011-11-01'), Timestamp('2011-12-01')]
+            expected = {ex_keys[0]: idx[[0, 5]],
+                        ex_keys[1]: idx[[1, 4]]}
+            tm.assert_dict_equal(idx.groupby(to_groupby), expected)
 
     def test_modulo(self):
         # GH 9244
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index a21295e1a9823..01c1d48c6d5c0 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -521,13 +521,6 @@ def test_groupby_dict_mapping(self):
         assert_series_equal(result, result2)
         assert_series_equal(result, expected2)
 
-    def test_groupby_bounds_check(self):
-        # groupby_X is code-generated, so if one variant
-        # does, the rest probably do to
-        a = np.array([1, 2], dtype='object')
-        b = np.array([1, 2, 3], dtype='object')
-        self.assertRaises(AssertionError, pd.algos.groupby_object, a, b)
-
     def test_groupby_grouper_f_sanity_checked(self):
         dates = date_range('01-Jan-2013', periods=12, freq='MS')
         ts = Series(np.random.randn(12), index=dates)
@@ -3478,13 +3471,13 @@ def test_groupby_nat_exclude(self):
              'str': [np.nan, 'a', np.nan, 'a', np.nan, 'a', np.nan, 'b']})
         grouped = df.groupby('dt')
 
-        expected = [[1, 7], [3, 5]]
+        expected = [pd.Index([1, 7]), pd.Index([3, 5])]
         keys = sorted(grouped.groups.keys())
         self.assertEqual(len(keys), 2)
         for k, e in zip(keys, expected):
             # grouped.groups keys are np.datetime64 with system tz
             # not to be affected by tz, only compare values
-            self.assertEqual(grouped.groups[k], e)
+            tm.assert_index_equal(grouped.groups[k], e)
 
         # confirm obj is not filtered
         tm.assert_frame_equal(grouped.grouper.groupings[0].obj, df)
@@ -4447,7 +4440,7 @@ def test_multiindex_columns_empty_level(self):
 
         expected = df.groupby('to filter').groups
         result = df.groupby([('to filter', '')]).groups
-        self.assertEqual(result, expected)
+        tm.assert_dict_equal(result, expected)
 
     def test_cython_median(self):
         df = DataFrame(np.random.randn(1000))
diff --git a/pandas/tests/types/test_inference.py b/pandas/tests/types/test_inference.py
index 9a12220f5b41d..a63ae5f7cf74e 100644
--- a/pandas/tests/types/test_inference.py
+++ b/pandas/tests/types/test_inference.py
@@ -16,7 +16,7 @@
 from pandas import lib, tslib
 from pandas import (Series, Index, DataFrame, Timedelta,
                     DatetimeIndex, TimedeltaIndex, Timestamp,
-                    Panel, Period)
+                    Panel, Period, Categorical)
 from pandas.compat import u, PY2, lrange
 from pandas.types import inference
 from pandas.types.common import (is_timedelta64_dtype,
@@ -26,7 +26,8 @@
                                  is_float,
                                  is_bool,
                                  is_scalar,
-                                 _ensure_int32)
+                                 _ensure_int32,
+                                 _ensure_categorical)
 from pandas.types.missing import isnull
 from pandas.util import testing as tm
 
@@ -842,6 +843,16 @@ def test_ensure_int32():
     assert (result.dtype == np.int32)
 
 
+def test_ensure_categorical():
+    values = np.arange(10, dtype=np.int32)
+    result = _ensure_categorical(values)
+    assert (result.dtype == 'category')
+
+    values = Categorical(values)
+    result = _ensure_categorical(values)
+    tm.assert_categorical_equal(result, values)
+
+
 if __name__ == '__main__':
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
                    exit=False)
diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py
index 3b676b894d355..96213a4aec34d 100644
--- a/pandas/tseries/base.py
+++ b/pandas/tseries/base.py
@@ -226,10 +226,6 @@ def _box_values(self, values):
         """
         return lib.map_infer(values, self._box_func)
 
-    def groupby(self, f):
-        objs = self.asobject.values
-        return _algos.groupby_object(objs, f)
-
     def _format_with_header(self, header, **kwargs):
         return header + list(self._format_native_types(**kwargs))
 
diff --git a/pandas/types/common.py b/pandas/types/common.py
index 2e7a67112e6db..e0e4501738745 100644
--- a/pandas/types/common.py
+++ b/pandas/types/common.py
@@ -42,6 +42,13 @@ def _ensure_float(arr):
 _ensure_object = algos.ensure_object
 
 
+def _ensure_categorical(arr):
+    if not is_categorical(arr):
+        from pandas import Categorical
+        arr = Categorical(arr)
+    return arr
+
+
 def is_object_dtype(arr_or_dtype):
     tipo = _get_dtype_type(arr_or_dtype)
     return issubclass(tipo, np.object_)