Skip to content

Commit db3c6e4

Browse files
committed
clean/reorg tests
1 parent 205489b commit db3c6e4

File tree

3 files changed

+324
-299
lines changed

3 files changed

+324
-299
lines changed

pandas/tests/groupby/common.py

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,31 @@
11
""" Base setup """
22

3+
import pytest
34
import numpy as np
45
from pandas.util import testing as tm
56
from pandas import DataFrame, MultiIndex
67

78

9+
@pytest.fixture
10+
def mframe():
11+
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
12+
'three']],
13+
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
14+
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
15+
names=['first', 'second'])
16+
return DataFrame(np.random.randn(10, 3), index=index,
17+
columns=['A', 'B', 'C'])
18+
19+
20+
@pytest.fixture
21+
def df():
22+
return DataFrame(
23+
{'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
24+
'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
25+
'C': np.random.randn(8),
26+
'D': np.random.randn(8)})
27+
28+
829
class MixIn(object):
930

1031
def setUp(self):
@@ -15,26 +36,15 @@ def setUp(self):
1536
self.frame = DataFrame(self.seriesd)
1637
self.tsframe = DataFrame(self.tsd)
1738

18-
self.df = DataFrame(
19-
{'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
20-
'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
21-
'C': np.random.randn(8),
22-
'D': np.random.randn(8)})
23-
39+
self.df = df()
2440
self.df_mixed_floats = DataFrame(
2541
{'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
2642
'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
2743
'C': np.random.randn(8),
2844
'D': np.array(
2945
np.random.randn(8), dtype='float32')})
3046

31-
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
32-
'three']],
33-
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
34-
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
35-
names=['first', 'second'])
36-
self.mframe = DataFrame(np.random.randn(10, 3), index=index,
37-
columns=['A', 'B', 'C'])
47+
self.mframe = mframe()
3848

3949
self.three_group = DataFrame(
4050
{'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar',

pandas/tests/groupby/test_groupby.py

Lines changed: 0 additions & 286 deletions
Original file line numberDiff line numberDiff line change
@@ -3706,292 +3706,6 @@ def test_index_label_overlaps_location(self):
37063706
expected = ser.take([1, 3, 4])
37073707
assert_series_equal(actual, expected)
37083708

3709-
def test_groupby_selection_with_methods(self):
3710-
# some methods which require DatetimeIndex
3711-
rng = pd.date_range('2014', periods=len(self.df))
3712-
self.df.index = rng
3713-
3714-
g = self.df.groupby(['A'])[['C']]
3715-
g_exp = self.df[['C']].groupby(self.df['A'])
3716-
# TODO check groupby with > 1 col ?
3717-
3718-
# methods which are called as .foo()
3719-
methods = ['count',
3720-
'corr',
3721-
'cummax',
3722-
'cummin',
3723-
'cumprod',
3724-
'describe',
3725-
'rank',
3726-
'quantile',
3727-
'diff',
3728-
'shift',
3729-
'all',
3730-
'any',
3731-
'idxmin',
3732-
'idxmax',
3733-
'ffill',
3734-
'bfill',
3735-
'pct_change',
3736-
'tshift']
3737-
3738-
for m in methods:
3739-
res = getattr(g, m)()
3740-
exp = getattr(g_exp, m)()
3741-
assert_frame_equal(res, exp) # should always be frames!
3742-
3743-
# methods which aren't just .foo()
3744-
assert_frame_equal(g.fillna(0), g_exp.fillna(0))
3745-
assert_frame_equal(g.dtypes, g_exp.dtypes)
3746-
assert_frame_equal(g.apply(lambda x: x.sum()),
3747-
g_exp.apply(lambda x: x.sum()))
3748-
3749-
assert_frame_equal(g.resample('D').mean(), g_exp.resample('D').mean())
3750-
assert_frame_equal(g.resample('D').ohlc(),
3751-
g_exp.resample('D').ohlc())
3752-
3753-
assert_frame_equal(g.filter(lambda x: len(x) == 3),
3754-
g_exp.filter(lambda x: len(x) == 3))
3755-
3756-
# The methods returned by these attributes don't have a __name__ attribute
3757-
# that matches that attribute.
3758-
# TODO: Fix these inconsistencies
3759-
DF_METHOD_NAMES_THAT_DONT_MATCH_ATTRIBUTE = frozenset([
3760-
'boxplot',
3761-
'bfill',
3762-
'ffill'
3763-
])
3764-
S_METHOD_NAMES_THAT_DONT_MATCH_ATTRIBUTE = frozenset([
3765-
'bfill',
3766-
'ffill'
3767-
])
3768-
3769-
def test_groupby_whitelist(self):
3770-
from string import ascii_lowercase
3771-
letters = np.array(list(ascii_lowercase))
3772-
N = 10
3773-
random_letters = letters.take(np.random.randint(0, 26, N))
3774-
df = DataFrame({'floats': N / 10 * Series(np.random.random(N)),
3775-
'letters': Series(random_letters)})
3776-
s = df.floats
3777-
3778-
df_whitelist = frozenset([
3779-
'last',
3780-
'first',
3781-
'mean',
3782-
'sum',
3783-
'min',
3784-
'max',
3785-
'head',
3786-
'tail',
3787-
'cumcount',
3788-
'resample',
3789-
'rank',
3790-
'quantile',
3791-
'fillna',
3792-
'mad',
3793-
'any',
3794-
'all',
3795-
'take',
3796-
'idxmax',
3797-
'idxmin',
3798-
'shift',
3799-
'tshift',
3800-
'ffill',
3801-
'bfill',
3802-
'pct_change',
3803-
'skew',
3804-
'plot',
3805-
'boxplot',
3806-
'hist',
3807-
'median',
3808-
'dtypes',
3809-
'corrwith',
3810-
'corr',
3811-
'cov',
3812-
'diff',
3813-
])
3814-
s_whitelist = frozenset([
3815-
'last',
3816-
'first',
3817-
'mean',
3818-
'sum',
3819-
'min',
3820-
'max',
3821-
'head',
3822-
'tail',
3823-
'cumcount',
3824-
'resample',
3825-
'rank',
3826-
'quantile',
3827-
'fillna',
3828-
'mad',
3829-
'any',
3830-
'all',
3831-
'take',
3832-
'idxmax',
3833-
'idxmin',
3834-
'shift',
3835-
'tshift',
3836-
'ffill',
3837-
'bfill',
3838-
'pct_change',
3839-
'skew',
3840-
'plot',
3841-
'hist',
3842-
'median',
3843-
'dtype',
3844-
'corr',
3845-
'cov',
3846-
'diff',
3847-
'unique',
3848-
'nlargest',
3849-
'nsmallest',
3850-
])
3851-
3852-
names_dont_match_pair = (
3853-
self.DF_METHOD_NAMES_THAT_DONT_MATCH_ATTRIBUTE,
3854-
self.S_METHOD_NAMES_THAT_DONT_MATCH_ATTRIBUTE)
3855-
for obj, whitelist, names_dont_match in (
3856-
zip((df, s),
3857-
(df_whitelist, s_whitelist),
3858-
names_dont_match_pair)):
3859-
3860-
gb = obj.groupby(df.letters)
3861-
3862-
assert whitelist == gb._apply_whitelist
3863-
for m in whitelist:
3864-
f = getattr(type(gb), m)
3865-
3866-
# name
3867-
try:
3868-
n = f.__name__
3869-
except AttributeError:
3870-
continue
3871-
if m not in names_dont_match:
3872-
assert n == m
3873-
3874-
# qualname
3875-
if compat.PY3:
3876-
try:
3877-
n = f.__qualname__
3878-
except AttributeError:
3879-
continue
3880-
if m not in names_dont_match:
3881-
assert n.endswith(m)
3882-
3883-
def test_groupby_method_names_that_dont_match_attribute(self):
3884-
from string import ascii_lowercase
3885-
letters = np.array(list(ascii_lowercase))
3886-
N = 10
3887-
random_letters = letters.take(np.random.randint(0, 26, N))
3888-
df = DataFrame({'floats': N / 10 * Series(np.random.random(N)),
3889-
'letters': Series(random_letters)})
3890-
gb = df.groupby(df.letters)
3891-
s = df.floats
3892-
3893-
names_dont_match_pair = (
3894-
self.DF_METHOD_NAMES_THAT_DONT_MATCH_ATTRIBUTE,
3895-
self.S_METHOD_NAMES_THAT_DONT_MATCH_ATTRIBUTE)
3896-
for obj, names_dont_match in zip((df, s), names_dont_match_pair):
3897-
gb = obj.groupby(df.letters)
3898-
for m in names_dont_match:
3899-
f = getattr(gb, m)
3900-
self.assertNotEqual(f.__name__, m)
3901-
3902-
AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew',
3903-
'mad', 'std', 'var', 'sem']
3904-
AGG_FUNCTIONS_WITH_SKIPNA = ['skew', 'mad']
3905-
3906-
def test_regression_whitelist_methods(self):
3907-
3908-
# GH6944
3909-
# explicity test the whitelest methods
3910-
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
3911-
'three']],
3912-
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
3913-
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
3914-
names=['first', 'second'])
3915-
raw_frame = DataFrame(np.random.randn(10, 3), index=index,
3916-
columns=Index(['A', 'B', 'C'], name='exp'))
3917-
raw_frame.iloc[1, [1, 2]] = np.nan
3918-
raw_frame.iloc[7, [0, 1]] = np.nan
3919-
3920-
for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS,
3921-
lrange(2), lrange(2),
3922-
[True, False]):
3923-
3924-
if axis == 0:
3925-
frame = raw_frame
3926-
else:
3927-
frame = raw_frame.T
3928-
3929-
if op in self.AGG_FUNCTIONS_WITH_SKIPNA:
3930-
grouped = frame.groupby(level=level, axis=axis)
3931-
result = getattr(grouped, op)(skipna=skipna)
3932-
expected = getattr(frame, op)(level=level, axis=axis,
3933-
skipna=skipna)
3934-
assert_frame_equal(result, expected)
3935-
else:
3936-
grouped = frame.groupby(level=level, axis=axis)
3937-
result = getattr(grouped, op)()
3938-
expected = getattr(frame, op)(level=level, axis=axis)
3939-
assert_frame_equal(result, expected)
3940-
3941-
def test_groupby_blacklist(self):
3942-
from string import ascii_lowercase
3943-
letters = np.array(list(ascii_lowercase))
3944-
N = 10
3945-
random_letters = letters.take(np.random.randint(0, 26, N))
3946-
df = DataFrame({'floats': N / 10 * Series(np.random.random(N)),
3947-
'letters': Series(random_letters)})
3948-
s = df.floats
3949-
3950-
blacklist = [
3951-
'eval', 'query', 'abs', 'where',
3952-
'mask', 'align', 'groupby', 'clip', 'astype',
3953-
'at', 'combine', 'consolidate', 'convert_objects',
3954-
]
3955-
to_methods = [method for method in dir(df) if method.startswith('to_')]
3956-
3957-
blacklist.extend(to_methods)
3958-
3959-
# e.g., to_csv
3960-
defined_but_not_allowed = ("(?:^Cannot.+{0!r}.+{1!r}.+try using the "
3961-
"'apply' method$)")
3962-
3963-
# e.g., query, eval
3964-
not_defined = "(?:^{1!r} object has no attribute {0!r}$)"
3965-
fmt = defined_but_not_allowed + '|' + not_defined
3966-
for bl in blacklist:
3967-
for obj in (df, s):
3968-
gb = obj.groupby(df.letters)
3969-
msg = fmt.format(bl, type(gb).__name__)
3970-
with tm.assertRaisesRegexp(AttributeError, msg):
3971-
getattr(gb, bl)
3972-
3973-
def test_tab_completion(self):
3974-
grp = self.mframe.groupby(level='second')
3975-
results = set([v for v in dir(grp) if not v.startswith('_')])
3976-
expected = set(
3977-
['A', 'B', 'C', 'agg', 'aggregate', 'apply', 'boxplot', 'filter',
3978-
'first', 'get_group', 'groups', 'hist', 'indices', 'last', 'max',
3979-
'mean', 'median', 'min', 'name', 'ngroups', 'nth', 'ohlc', 'plot',
3980-
'prod', 'size', 'std', 'sum', 'transform', 'var', 'sem', 'count',
3981-
'nunique', 'head', 'describe', 'cummax', 'quantile',
3982-
'rank', 'cumprod', 'tail', 'resample', 'cummin', 'fillna',
3983-
'cumsum', 'cumcount', 'all', 'shift', 'skew', 'bfill', 'ffill',
3984-
'take', 'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith',
3985-
'cov', 'dtypes', 'ndim', 'diff', 'idxmax', 'idxmin',
3986-
'ffill', 'bfill', 'pad', 'backfill', 'rolling', 'expanding'])
3987-
self.assertEqual(results, expected)
3988-
3989-
def test_groupby_function_rename(self):
3990-
grp = self.mframe.groupby(level='second')
3991-
for name in ['sum', 'prod', 'min', 'max', 'first', 'last']:
3992-
f = getattr(grp, name)
3993-
self.assertEqual(f.__name__, name)
3994-
39953709
def test_lower_int_prec_count(self):
39963710
df = DataFrame({'a': np.array(
39973711
[0, 1, 2, 100], np.int8),

0 commit comments

Comments
 (0)