clean/reorg tests

jreback · jreback · commit db3c6e4645f2 · 2017-03-28T17:37:06.000-04:00
diff --git a/pandas/tests/groupby/common.py b/pandas/tests/groupby/common.py
@@ -1,10 +1,31 @@
 """ Base setup """
 
+import pytest
 import numpy as np
 from pandas.util import testing as tm
 from pandas import DataFrame, MultiIndex
 
 
+@pytest.fixture
+def mframe():
+    index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
+                                                              'three']],
+                       labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
+                               [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
+                       names=['first', 'second'])
+    return DataFrame(np.random.randn(10, 3), index=index,
+                     columns=['A', 'B', 'C'])
+
+
+@pytest.fixture
+def df():
+    return DataFrame(
+        {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
+         'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
+         'C': np.random.randn(8),
+         'D': np.random.randn(8)})
+
+
 class MixIn(object):
 
     def setUp(self):
@@ -15,26 +36,15 @@ def setUp(self):
         self.frame = DataFrame(self.seriesd)
         self.tsframe = DataFrame(self.tsd)
 
-        self.df = DataFrame(
-            {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
-             'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
-             'C': np.random.randn(8),
-             'D': np.random.randn(8)})
-
+        self.df = df()
         self.df_mixed_floats = DataFrame(
             {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
              'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
              'C': np.random.randn(8),
              'D': np.array(
                  np.random.randn(8), dtype='float32')})
 
-        index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
-                                                                  'three']],
-                           labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
-                                   [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
-                           names=['first', 'second'])
-        self.mframe = DataFrame(np.random.randn(10, 3), index=index,
-                                columns=['A', 'B', 'C'])
+        self.mframe = mframe()
 
         self.three_group = DataFrame(
             {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar',
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -3706,292 +3706,6 @@ def test_index_label_overlaps_location(self):
         expected = ser.take([1, 3, 4])
         assert_series_equal(actual, expected)
 
-    def test_groupby_selection_with_methods(self):
-        # some methods which require DatetimeIndex
-        rng = pd.date_range('2014', periods=len(self.df))
-        self.df.index = rng
-
-        g = self.df.groupby(['A'])[['C']]
-        g_exp = self.df[['C']].groupby(self.df['A'])
-        # TODO check groupby with > 1 col ?
-
-        # methods which are called as .foo()
-        methods = ['count',
-                   'corr',
-                   'cummax',
-                   'cummin',
-                   'cumprod',
-                   'describe',
-                   'rank',
-                   'quantile',
-                   'diff',
-                   'shift',
-                   'all',
-                   'any',
-                   'idxmin',
-                   'idxmax',
-                   'ffill',
-                   'bfill',
-                   'pct_change',
-                   'tshift']
-
-        for m in methods:
-            res = getattr(g, m)()
-            exp = getattr(g_exp, m)()
-            assert_frame_equal(res, exp)  # should always be frames!
-
-        # methods which aren't just .foo()
-        assert_frame_equal(g.fillna(0), g_exp.fillna(0))
-        assert_frame_equal(g.dtypes, g_exp.dtypes)
-        assert_frame_equal(g.apply(lambda x: x.sum()),
-                           g_exp.apply(lambda x: x.sum()))
-
-        assert_frame_equal(g.resample('D').mean(), g_exp.resample('D').mean())
-        assert_frame_equal(g.resample('D').ohlc(),
-                           g_exp.resample('D').ohlc())
-
-        assert_frame_equal(g.filter(lambda x: len(x) == 3),
-                           g_exp.filter(lambda x: len(x) == 3))
-
-    # The methods returned by these attributes don't have a __name__ attribute
-    # that matches that attribute.
-    # TODO: Fix these inconsistencies
-    DF_METHOD_NAMES_THAT_DONT_MATCH_ATTRIBUTE = frozenset([
-        'boxplot',
-        'bfill',
-        'ffill'
-    ])
-    S_METHOD_NAMES_THAT_DONT_MATCH_ATTRIBUTE = frozenset([
-        'bfill',
-        'ffill'
-    ])
-
-    def test_groupby_whitelist(self):
-        from string import ascii_lowercase
-        letters = np.array(list(ascii_lowercase))
-        N = 10
-        random_letters = letters.take(np.random.randint(0, 26, N))
-        df = DataFrame({'floats': N / 10 * Series(np.random.random(N)),
-                        'letters': Series(random_letters)})
-        s = df.floats
-
-        df_whitelist = frozenset([
-            'last',
-            'first',
-            'mean',
-            'sum',
-            'min',
-            'max',
-            'head',
-            'tail',
-            'cumcount',
-            'resample',
-            'rank',
-            'quantile',
-            'fillna',
-            'mad',
-            'any',
-            'all',
-            'take',
-            'idxmax',
-            'idxmin',
-            'shift',
-            'tshift',
-            'ffill',
-            'bfill',
-            'pct_change',
-            'skew',
-            'plot',
-            'boxplot',
-            'hist',
-            'median',
-            'dtypes',
-            'corrwith',
-            'corr',
-            'cov',
-            'diff',
-        ])
-        s_whitelist = frozenset([
-            'last',
-            'first',
-            'mean',
-            'sum',
-            'min',
-            'max',
-            'head',
-            'tail',
-            'cumcount',
-            'resample',
-            'rank',
-            'quantile',
-            'fillna',
-            'mad',
-            'any',
-            'all',
-            'take',
-            'idxmax',
-            'idxmin',
-            'shift',
-            'tshift',
-            'ffill',
-            'bfill',
-            'pct_change',
-            'skew',
-            'plot',
-            'hist',
-            'median',
-            'dtype',
-            'corr',
-            'cov',
-            'diff',
-            'unique',
-            'nlargest',
-            'nsmallest',
-        ])
-
-        names_dont_match_pair = (
-            self.DF_METHOD_NAMES_THAT_DONT_MATCH_ATTRIBUTE,
-            self.S_METHOD_NAMES_THAT_DONT_MATCH_ATTRIBUTE)
-        for obj, whitelist, names_dont_match in (
-                zip((df, s),
-                    (df_whitelist, s_whitelist),
-                    names_dont_match_pair)):
-
-            gb = obj.groupby(df.letters)
-
-            assert whitelist == gb._apply_whitelist
-            for m in whitelist:
-                f = getattr(type(gb), m)
-
-                # name
-                try:
-                    n = f.__name__
-                except AttributeError:
-                    continue
-                if m not in names_dont_match:
-                    assert n == m
-
-                # qualname
-                if compat.PY3:
-                    try:
-                        n = f.__qualname__
-                    except AttributeError:
-                        continue
-                    if m not in names_dont_match:
-                        assert n.endswith(m)
-
-    def test_groupby_method_names_that_dont_match_attribute(self):
-        from string import ascii_lowercase
-        letters = np.array(list(ascii_lowercase))
-        N = 10
-        random_letters = letters.take(np.random.randint(0, 26, N))
-        df = DataFrame({'floats': N / 10 * Series(np.random.random(N)),
-                        'letters': Series(random_letters)})
-        gb = df.groupby(df.letters)
-        s = df.floats
-
-        names_dont_match_pair = (
-            self.DF_METHOD_NAMES_THAT_DONT_MATCH_ATTRIBUTE,
-            self.S_METHOD_NAMES_THAT_DONT_MATCH_ATTRIBUTE)
-        for obj, names_dont_match in zip((df, s), names_dont_match_pair):
-            gb = obj.groupby(df.letters)
-            for m in names_dont_match:
-                f = getattr(gb, m)
-                self.assertNotEqual(f.__name__, m)
-
-    AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew',
-                     'mad', 'std', 'var', 'sem']
-    AGG_FUNCTIONS_WITH_SKIPNA = ['skew', 'mad']
-
-    def test_regression_whitelist_methods(self):
-
-        # GH6944
-        # explicity test the whitelest methods
-        index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
-                                                                  'three']],
-                           labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
-                                   [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
-                           names=['first', 'second'])
-        raw_frame = DataFrame(np.random.randn(10, 3), index=index,
-                              columns=Index(['A', 'B', 'C'], name='exp'))
-        raw_frame.iloc[1, [1, 2]] = np.nan
-        raw_frame.iloc[7, [0, 1]] = np.nan
-
-        for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS,
-                                                    lrange(2), lrange(2),
-                                                    [True, False]):
-
-            if axis == 0:
-                frame = raw_frame
-            else:
-                frame = raw_frame.T
-
-            if op in self.AGG_FUNCTIONS_WITH_SKIPNA:
-                grouped = frame.groupby(level=level, axis=axis)
-                result = getattr(grouped, op)(skipna=skipna)
-                expected = getattr(frame, op)(level=level, axis=axis,
-                                              skipna=skipna)
-                assert_frame_equal(result, expected)
-            else:
-                grouped = frame.groupby(level=level, axis=axis)
-                result = getattr(grouped, op)()
-                expected = getattr(frame, op)(level=level, axis=axis)
-                assert_frame_equal(result, expected)
-
-    def test_groupby_blacklist(self):
-        from string import ascii_lowercase
-        letters = np.array(list(ascii_lowercase))
-        N = 10
-        random_letters = letters.take(np.random.randint(0, 26, N))
-        df = DataFrame({'floats': N / 10 * Series(np.random.random(N)),
-                        'letters': Series(random_letters)})
-        s = df.floats
-
-        blacklist = [
-            'eval', 'query', 'abs', 'where',
-            'mask', 'align', 'groupby', 'clip', 'astype',
-            'at', 'combine', 'consolidate', 'convert_objects',
-        ]
-        to_methods = [method for method in dir(df) if method.startswith('to_')]
-
-        blacklist.extend(to_methods)
-
-        # e.g., to_csv
-        defined_but_not_allowed = ("(?:^Cannot.+{0!r}.+{1!r}.+try using the "
-                                   "'apply' method$)")
-
-        # e.g., query, eval
-        not_defined = "(?:^{1!r} object has no attribute {0!r}$)"
-        fmt = defined_but_not_allowed + '|' + not_defined
-        for bl in blacklist:
-            for obj in (df, s):
-                gb = obj.groupby(df.letters)
-                msg = fmt.format(bl, type(gb).__name__)
-                with tm.assertRaisesRegexp(AttributeError, msg):
-                    getattr(gb, bl)
-
-    def test_tab_completion(self):
-        grp = self.mframe.groupby(level='second')
-        results = set([v for v in dir(grp) if not v.startswith('_')])
-        expected = set(
-            ['A', 'B', 'C', 'agg', 'aggregate', 'apply', 'boxplot', 'filter',
-             'first', 'get_group', 'groups', 'hist', 'indices', 'last', 'max',
-             'mean', 'median', 'min', 'name', 'ngroups', 'nth', 'ohlc', 'plot',
-             'prod', 'size', 'std', 'sum', 'transform', 'var', 'sem', 'count',
-             'nunique', 'head', 'describe', 'cummax', 'quantile',
-             'rank', 'cumprod', 'tail', 'resample', 'cummin', 'fillna',
-             'cumsum', 'cumcount', 'all', 'shift', 'skew', 'bfill', 'ffill',
-             'take', 'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith',
-             'cov', 'dtypes', 'ndim', 'diff', 'idxmax', 'idxmin',
-             'ffill', 'bfill', 'pad', 'backfill', 'rolling', 'expanding'])
-        self.assertEqual(results, expected)
-
-    def test_groupby_function_rename(self):
-        grp = self.mframe.groupby(level='second')
-        for name in ['sum', 'prod', 'min', 'max', 'first', 'last']:
-            f = getattr(grp, name)
-            self.assertEqual(f.__name__, name)
-
     def test_lower_int_prec_count(self):
         df = DataFrame({'a': np.array(
             [0, 1, 2, 100], np.int8),
diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py