From 68c7064a851094c24b99870b49db51d2e797f6cf Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Tue, 17 Dec 2019 21:47:35 +0000
Subject: [PATCH 1/3] move NDFrame.groupby to (DataFrame|Series).groupby

---
 pandas/core/frame.py   | 79 +++++++++++++++++++++++++++++++++++++-
 pandas/core/generic.py | 86 ++++--------------------------------------
 pandas/core/series.py  | 85 ++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 169 insertions(+), 81 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 1de0d3b58dc5f..bef6d90f00ba8 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -94,7 +94,8 @@
 )
 from pandas.core.dtypes.missing import isna, notna
 
-from pandas.core import algorithms, common as com, nanops, ops
+from pandas._typing import Axes, Dtype, FilePathOrBuffer
+from pandas.core import algorithms, common as com, groupby, nanops, ops
 from pandas.core.accessor import CachedAccessor
 from pandas.core.arrays import Categorical, ExtensionArray
 from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin as DatetimeLikeArray
@@ -5598,6 +5599,82 @@ def update(
 
     # ----------------------------------------------------------------------
     # Data reshaping
+    @Appender(
+        """
+Examples
+--------
+>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
+...                               'Parrot', 'Parrot'],
+...                    'Max Speed': [380., 370., 24., 26.]})
+>>> df
+   Animal  Max Speed
+0  Falcon      380.0
+1  Falcon      370.0
+2  Parrot       24.0
+3  Parrot       26.0
+>>> df.groupby(['Animal']).mean()
+        Max Speed
+Animal
+Falcon      375.0
+Parrot       25.0
+
+**Hierarchical Indexes**
+
+We can groupby different levels of a hierarchical index
+using the `level` parameter:
+
+>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
+...           ['Captive', 'Wild', 'Captive', 'Wild']]
+>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
+>>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
+...                   index=index)
+>>> df
+                Max Speed
+Animal Type
+Falcon Captive      390.0
+       Wild         350.0
+Parrot Captive       30.0
+       Wild          20.0
+>>> df.groupby(level=0).mean()
+        Max Speed
+Animal
+Falcon      370.0
+Parrot       25.0
+>>> df.groupby(level="Type").mean()
+         Max Speed
+Type
+Captive      210.0
+Wild         185.0
+"""
+    )
+    @Appender(_shared_docs["groupby"] % _shared_doc_kwargs)
+    def groupby(
+        self,
+        by=None,
+        axis=0,
+        level=None,
+        as_index: bool = True,
+        sort: bool = True,
+        group_keys: bool = True,
+        squeeze: bool = False,
+        observed: bool = False,
+    ) -> "groupby.DataFrameGroupBy":
+
+        if level is None and by is None:
+            raise TypeError("You have to supply one of 'by' and 'level'")
+        axis = self._get_axis_number(axis)
+
+        return groupby.DataFrameGroupBy(
+            obj=self,
+            keys=by,
+            axis=axis,
+            level=level,
+            as_index=as_index,
+            sort=sort,
+            group_keys=group_keys,
+            squeeze=squeeze,
+            observed=observed,
+        )
 
     _shared_docs[
         "pivot"
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index b896721469f1f..d8da0af413eca 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -7273,19 +7273,10 @@ def clip(
 
         return result
 
-    def groupby(
-        self,
-        by=None,
-        axis=0,
-        level=None,
-        as_index: bool_t = True,
-        sort: bool_t = True,
-        group_keys: bool_t = True,
-        squeeze: bool_t = False,
-        observed: bool_t = False,
-    ):
-        """
-        Group DataFrame or Series using a mapper or by a Series of columns.
+    _shared_docs[
+        "groupby"
+    ] = """
+        Group %(klass)s using a mapper or by a Series of columns.
 
         A groupby operation involves some combination of splitting the
         object, applying a function, and combining the results. This can be
@@ -7330,9 +7321,8 @@ def groupby(
 
         Returns
         -------
-        DataFrameGroupBy or SeriesGroupBy
-            Depends on the calling object and returns groupby object that
-            contains information about the groups.
+        %(klass)sGroupBy
+            Returns a groupby object that contains information about the groups.
 
         See Also
         --------
@@ -7343,69 +7333,7 @@ def groupby(
         -----
         See the `user guide
         <http://pandas.pydata.org/pandas-docs/stable/groupby.html>`_ for more.
-
-        Examples
-        --------
-        >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
-        ...                               'Parrot', 'Parrot'],
-        ...                    'Max Speed': [380., 370., 24., 26.]})
-        >>> df
-           Animal  Max Speed
-        0  Falcon      380.0
-        1  Falcon      370.0
-        2  Parrot       24.0
-        3  Parrot       26.0
-        >>> df.groupby(['Animal']).mean()
-                Max Speed
-        Animal
-        Falcon      375.0
-        Parrot       25.0
-
-        **Hierarchical Indexes**
-
-        We can groupby different levels of a hierarchical index
-        using the `level` parameter:
-
-        >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
-        ...           ['Captive', 'Wild', 'Captive', 'Wild']]
-        >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
-        >>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
-        ...                   index=index)
-        >>> df
-                        Max Speed
-        Animal Type
-        Falcon Captive      390.0
-               Wild         350.0
-        Parrot Captive       30.0
-               Wild          20.0
-        >>> df.groupby(level=0).mean()
-                Max Speed
-        Animal
-        Falcon      370.0
-        Parrot       25.0
-        >>> df.groupby(level=1).mean()
-                 Max Speed
-        Type
-        Captive      210.0
-        Wild         185.0
-        """
-        from pandas.core.groupby.groupby import get_groupby
-
-        if level is None and by is None:
-            raise TypeError("You have to supply one of 'by' and 'level'")
-        axis = self._get_axis_number(axis)
-
-        return get_groupby(
-            self,
-            by=by,
-            axis=axis,
-            level=level,
-            as_index=as_index,
-            sort=sort,
-            group_keys=group_keys,
-            squeeze=squeeze,
-            observed=observed,
-        )
+        """
 
     def asfreq(
         self,
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 36e26e088935c..1c1c548b1f434 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -47,7 +47,7 @@
 )
 
 import pandas as pd
-from pandas.core import algorithms, base, generic, nanops, ops
+from pandas.core import algorithms, base, generic, groupby, nanops, ops
 from pandas.core.accessor import CachedAccessor
 from pandas.core.arrays import ExtensionArray, try_cast_to_ea
 from pandas.core.arrays.categorical import Categorical, CategoricalAccessor
@@ -1568,6 +1568,89 @@ def _set_name(self, name, inplace=False):
         ser.name = name
         return ser
 
+    @Appender(
+        """
+Examples
+--------
+>>> ser = pd.Series([390., 350., 30., 20.],
+...                 index=['Falcon', 'Falcon', 'Parrot', 'Parrot'], name="Max Speed")
+>>> ser
+Falcon    390.0
+Falcon    350.0
+Parrot     30.0
+Parrot     20.0
+Name: Max Speed, dtype: float64
+>>> ser.groupby(["a", "b", "a", "b"]).mean()
+a    210.0
+b    185.0
+Name: Max Speed, dtype: float64
+>>> ser.groupby(level=0).mean()
+Falcon    370.0
+Parrot     25.0
+Name: Max Speed, dtype: float64
+>>> ser.groupby(ser > 100).mean()
+Max Speed
+False     25.0
+True     370.0
+Name: Max Speed, dtype: float64
+
+**Grouping by Indexes**
+
+We can groupby different levels of a hierarchical index
+using the `level` parameter:
+
+>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
+...           ['Captive', 'Wild', 'Captive', 'Wild']]
+>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
+>>> ser = pd.Series([390., 350., 30., 20.], index=index, name="Max Speed")
+>>> ser
+Animal  Type
+Falcon  Captive    390.0
+        Wild       350.0
+Parrot  Captive     30.0
+        Wild        20.0
+Name: Max Speed, dtype: float64
+>>> ser.groupby(level=0).mean()
+Animal
+Falcon    370.0
+Parrot     25.0
+Name: Max Speed, dtype: float64
+>>> ser.groupby(level="Type").mean()
+Type
+Captive    210.0
+Wild       185.0
+Name: Max Speed, dtype: float64
+"""
+    )
+    @Appender(generic._shared_docs["groupby"] % _shared_doc_kwargs)
+    def groupby(
+        self,
+        by=None,
+        axis=0,
+        level=None,
+        as_index: bool = True,
+        sort: bool = True,
+        group_keys: bool = True,
+        squeeze: bool = False,
+        observed: bool = False,
+    ) -> "groupby.SeriesGroupBy":
+
+        if level is None and by is None:
+            raise TypeError("You have to supply one of 'by' and 'level'")
+        axis = self._get_axis_number(axis)
+
+        return groupby.SeriesGroupBy(
+            obj=self,
+            keys=by,
+            axis=axis,
+            level=level,
+            as_index=as_index,
+            sort=sort,
+            group_keys=group_keys,
+            squeeze=squeeze,
+            observed=observed,
+        )
+
     # ----------------------------------------------------------------------
     # Statistics, overridden ndarray methods
 

From 7a2e0837b62bbf602344926a8296168b80b359db Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Wed, 18 Dec 2019 02:20:21 +0000
Subject: [PATCH 2/3] fix mypy errors

---
 pandas/core/frame.py         | 7 ++++---
 pandas/core/reshape/merge.py | 5 ++++-
 pandas/core/series.py        | 7 ++++---
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index bef6d90f00ba8..0c084250b6fe9 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -95,12 +95,13 @@
 from pandas.core.dtypes.missing import isna, notna
 
 from pandas._typing import Axes, Dtype, FilePathOrBuffer
-from pandas.core import algorithms, common as com, groupby, nanops, ops
+from pandas.core import algorithms, common as com, nanops, ops
 from pandas.core.accessor import CachedAccessor
 from pandas.core.arrays import Categorical, ExtensionArray
 from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin as DatetimeLikeArray
 from pandas.core.arrays.sparse import SparseFrameAccessor
 from pandas.core.generic import NDFrame, _shared_docs
+from pandas.core.groupby import generic as grp_generic
 from pandas.core.indexes import base as ibase
 from pandas.core.indexes.api import Index, ensure_index, ensure_index_from_sequences
 from pandas.core.indexes.datetimes import DatetimeIndex
@@ -5658,13 +5659,13 @@ def groupby(
         group_keys: bool = True,
         squeeze: bool = False,
         observed: bool = False,
-    ) -> "groupby.DataFrameGroupBy":
+    ) -> "grp_generic.DataFrameGroupBy":
 
         if level is None and by is None:
             raise TypeError("You have to supply one of 'by' and 'level'")
         axis = self._get_axis_number(axis)
 
-        return groupby.DataFrameGroupBy(
+        return grp_generic.DataFrameGroupBy(
             obj=self,
             keys=by,
             axis=axis,
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 37ec05c40940e..6b46c6c534058 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -41,6 +41,8 @@
 from pandas.core.dtypes.missing import isna, na_value_for_dtype
 
 from pandas import Categorical, Index, MultiIndex
+from pandas._typing import FrameOrSeries
+from pandas.core import groupby
 import pandas.core.algorithms as algos
 from pandas.core.arrays.categorical import _recode_for_categories
 import pandas.core.common as com
@@ -113,6 +115,7 @@ def _groupby_and_merge(
         by = [by]
 
     lby = left.groupby(by, sort=False)
+    rby: Optional[groupby.DataFrameGroupBy] = None
 
     # if we can groupby the rhs
     # then we can get vastly better perf
@@ -132,7 +135,7 @@ def _groupby_and_merge(
     try:
         rby = right.groupby(by, sort=False)
     except KeyError:
-        rby = None
+        pass
 
     for key, lhs in lby:
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 1c1c548b1f434..1336c6b58e1c2 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -47,7 +47,7 @@
 )
 
 import pandas as pd
-from pandas.core import algorithms, base, generic, groupby, nanops, ops
+from pandas.core import algorithms, base, generic, nanops, ops
 from pandas.core.accessor import CachedAccessor
 from pandas.core.arrays import ExtensionArray, try_cast_to_ea
 from pandas.core.arrays.categorical import Categorical, CategoricalAccessor
@@ -60,6 +60,7 @@
     sanitize_array,
 )
 from pandas.core.generic import _shared_docs
+from pandas.core.groupby import generic as grp_generic
 from pandas.core.indexers import maybe_convert_indices
 from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
 from pandas.core.indexes.api import (
@@ -1633,13 +1634,13 @@ def groupby(
         group_keys: bool = True,
         squeeze: bool = False,
         observed: bool = False,
-    ) -> "groupby.SeriesGroupBy":
+    ) -> "grp_generic.SeriesGroupBy":
 
         if level is None and by is None:
             raise TypeError("You have to supply one of 'by' and 'level'")
         axis = self._get_axis_number(axis)
 
-        return groupby.SeriesGroupBy(
+        return grp_generic.SeriesGroupBy(
             obj=self,
             keys=by,
             axis=axis,

From 9ada87c12d855348767ec384113121f37724d051 Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Wed, 18 Dec 2019 20:36:02 +0000
Subject: [PATCH 3/3] rename grp_generic to groupby_generic

---
 pandas/core/frame.py         | 7 +++----
 pandas/core/reshape/merge.py | 1 -
 pandas/core/series.py        | 9 ++++-----
 3 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 0c084250b6fe9..7d71c3bfb0368 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -94,14 +94,13 @@
 )
 from pandas.core.dtypes.missing import isna, notna
 
-from pandas._typing import Axes, Dtype, FilePathOrBuffer
 from pandas.core import algorithms, common as com, nanops, ops
 from pandas.core.accessor import CachedAccessor
 from pandas.core.arrays import Categorical, ExtensionArray
 from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin as DatetimeLikeArray
 from pandas.core.arrays.sparse import SparseFrameAccessor
 from pandas.core.generic import NDFrame, _shared_docs
-from pandas.core.groupby import generic as grp_generic
+from pandas.core.groupby import generic as groupby_generic
 from pandas.core.indexes import base as ibase
 from pandas.core.indexes.api import Index, ensure_index, ensure_index_from_sequences
 from pandas.core.indexes.datetimes import DatetimeIndex
@@ -5659,13 +5658,13 @@ def groupby(
         group_keys: bool = True,
         squeeze: bool = False,
         observed: bool = False,
-    ) -> "grp_generic.DataFrameGroupBy":
+    ) -> "groupby_generic.DataFrameGroupBy":
 
         if level is None and by is None:
             raise TypeError("You have to supply one of 'by' and 'level'")
         axis = self._get_axis_number(axis)
 
-        return grp_generic.DataFrameGroupBy(
+        return groupby_generic.DataFrameGroupBy(
             obj=self,
             keys=by,
             axis=axis,
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 6b46c6c534058..a7471cc646777 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -41,7 +41,6 @@
 from pandas.core.dtypes.missing import isna, na_value_for_dtype
 
 from pandas import Categorical, Index, MultiIndex
-from pandas._typing import FrameOrSeries
 from pandas.core import groupby
 import pandas.core.algorithms as algos
 from pandas.core.arrays.categorical import _recode_for_categories
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 1336c6b58e1c2..aa5af9bb893fa 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -59,8 +59,7 @@
     is_empty_data,
     sanitize_array,
 )
-from pandas.core.generic import _shared_docs
-from pandas.core.groupby import generic as grp_generic
+from pandas.core.groupby import generic as groupby_generic
 from pandas.core.indexers import maybe_convert_indices
 from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
 from pandas.core.indexes.api import (
@@ -1432,7 +1431,7 @@ def to_string(
         """
     )
     @Substitution(klass="Series")
-    @Appender(_shared_docs["to_markdown"])
+    @Appender(generic._shared_docs["to_markdown"])
     def to_markdown(
         self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs,
     ) -> Optional[str]:
@@ -1634,13 +1633,13 @@ def groupby(
         group_keys: bool = True,
         squeeze: bool = False,
         observed: bool = False,
-    ) -> "grp_generic.SeriesGroupBy":
+    ) -> "groupby_generic.SeriesGroupBy":
 
         if level is None and by is None:
             raise TypeError("You have to supply one of 'by' and 'level'")
         axis = self._get_axis_number(axis)
 
-        return grp_generic.SeriesGroupBy(
+        return groupby_generic.SeriesGroupBy(
             obj=self,
             keys=by,
             axis=axis,