From d0131030efef013458b0607cafa62783317ca206 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 14 Jan 2019 09:50:52 -0800 Subject: [PATCH 1/8] Stop using singleton fixtures --- pandas/tests/frame/conftest.py | 221 --------------------- pandas/tests/frame/test_alter_axes.py | 79 +++++--- pandas/tests/frame/test_analytics.py | 215 +++++++++++++++----- pandas/tests/frame/test_api.py | 97 ++++++--- pandas/tests/frame/test_apply.py | 75 +++++-- pandas/tests/frame/test_arithmetic.py | 36 ++-- pandas/tests/frame/test_block_internals.py | 56 ++++-- pandas/util/testing.py | 117 ++++++++++- 8 files changed, 521 insertions(+), 375 deletions(-) delete mode 100644 pandas/tests/frame/conftest.py diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py deleted file mode 100644 index 377e737a53158..0000000000000 --- a/pandas/tests/frame/conftest.py +++ /dev/null @@ -1,221 +0,0 @@ -import numpy as np -import pytest - -from pandas import DataFrame, NaT, compat, date_range -import pandas.util.testing as tm - - -@pytest.fixture -def float_frame(): - """ - Fixture for DataFrame of floats with index of unique strings - - Columns are ['A', 'B', 'C', 'D']. - """ - return DataFrame(tm.getSeriesData()) - - -@pytest.fixture -def float_frame_with_na(): - """ - Fixture for DataFrame of floats with index of unique strings - - Columns are ['A', 'B', 'C', 'D']; some entries are missing - """ - df = DataFrame(tm.getSeriesData()) - # set some NAs - df.loc[5:10] = np.nan - df.loc[15:20, -2:] = np.nan - return df - - -@pytest.fixture -def float_frame2(): - """ - Fixture for DataFrame of floats with index of unique strings - - Columns are ['D', 'C', 'B', 'A'] - """ - return DataFrame(tm.getSeriesData(), columns=['D', 'C', 'B', 'A']) - - -@pytest.fixture -def bool_frame_with_na(): - """ - Fixture for DataFrame of booleans with index of unique strings - - Columns are ['A', 'B', 'C', 'D']; some entries are missing - """ - df = DataFrame(tm.getSeriesData()) > 0 - df = df.astype(object) - # set some NAs - df.loc[5:10] = np.nan - df.loc[15:20, -2:] = np.nan - return df - - -@pytest.fixture -def int_frame(): - """ - Fixture for DataFrame of ints with index of unique strings - - Columns are ['A', 'B', 'C', 'D'] - """ - df = DataFrame({k: v.astype(int) - for k, v in compat.iteritems(tm.getSeriesData())}) - # force these all to int64 to avoid platform testing issues - return DataFrame({c: s for c, s in compat.iteritems(df)}, dtype=np.int64) - - -@pytest.fixture -def datetime_frame(): - """ - Fixture for DataFrame of floats with DatetimeIndex - - Columns are ['A', 'B', 'C', 'D'] - """ - return DataFrame(tm.getTimeSeriesData()) - - -@pytest.fixture -def float_string_frame(): - """ - Fixture for DataFrame of floats and strings with index of unique strings - - Columns are ['A', 'B', 'C', 'D', 'foo']. - """ - df = DataFrame(tm.getSeriesData()) - df['foo'] = 'bar' - return df - - -@pytest.fixture -def mixed_float_frame(): - """ - Fixture for DataFrame of different float types with index of unique strings - - Columns are ['A', 'B', 'C', 'D']. - """ - df = DataFrame(tm.getSeriesData()) - df.A = df.A.astype('float32') - df.B = df.B.astype('float32') - df.C = df.C.astype('float16') - df.D = df.D.astype('float64') - return df - - -@pytest.fixture -def mixed_float_frame2(): - """ - Fixture for DataFrame of different float types with index of unique strings - - Columns are ['A', 'B', 'C', 'D']. - """ - df = DataFrame(tm.getSeriesData()) - df.D = df.D.astype('float32') - df.C = df.C.astype('float32') - df.B = df.B.astype('float16') - df.D = df.D.astype('float64') - return df - - -@pytest.fixture -def mixed_int_frame(): - """ - Fixture for DataFrame of different int types with index of unique strings - - Columns are ['A', 'B', 'C', 'D']. - """ - df = DataFrame({k: v.astype(int) - for k, v in compat.iteritems(tm.getSeriesData())}) - df.A = df.A.astype('int32') - df.B = np.ones(len(df.B), dtype='uint64') - df.C = df.C.astype('uint8') - df.D = df.C.astype('int64') - return df - - -@pytest.fixture -def mixed_type_frame(): - """ - Fixture for DataFrame of float/int/string columns with RangeIndex - - Columns are ['a', 'b', 'c', 'float32', 'int32']. - """ - return DataFrame({'a': 1., 'b': 2, 'c': 'foo', - 'float32': np.array([1.] * 10, dtype='float32'), - 'int32': np.array([1] * 10, dtype='int32')}, - index=np.arange(10)) - - -@pytest.fixture -def timezone_frame(): - """ - Fixture for DataFrame of date_range Series with different time zones - - Columns are ['A', 'B', 'C']; some entries are missing - """ - df = DataFrame({'A': date_range('20130101', periods=3), - 'B': date_range('20130101', periods=3, - tz='US/Eastern'), - 'C': date_range('20130101', periods=3, - tz='CET')}) - df.iloc[1, 1] = NaT - df.iloc[1, 2] = NaT - return df - - -@pytest.fixture -def empty_frame(): - """ - Fixture for empty DataFrame - """ - return DataFrame({}) - - -@pytest.fixture -def datetime_series(): - """ - Fixture for Series of floats with DatetimeIndex - """ - return tm.makeTimeSeries(nper=30) - - -@pytest.fixture -def datetime_series_short(): - """ - Fixture for Series of floats with DatetimeIndex - """ - return tm.makeTimeSeries(nper=30)[5:] - - -@pytest.fixture -def simple_frame(): - """ - Fixture for simple 3x3 DataFrame - - Columns are ['one', 'two', 'three'], index is ['a', 'b', 'c']. - """ - arr = np.array([[1., 2., 3.], - [4., 5., 6.], - [7., 8., 9.]]) - - return DataFrame(arr, columns=['one', 'two', 'three'], - index=['a', 'b', 'c']) - - -@pytest.fixture -def frame_of_index_cols(): - """ - Fixture for DataFrame of columns that can be used for indexing - - Columns are ['A', 'B', 'C', 'D', 'E', ('tuple', 'as', 'label')]; - 'A' & 'B' contain duplicates (but are jointly unique), the rest are unique. - """ - df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'], - 'B': ['one', 'two', 'three', 'one', 'two'], - 'C': ['a', 'b', 'c', 'd', 'e'], - 'D': np.random.randn(5), - 'E': np.random.randn(5), - ('tuple', 'as', 'label'): np.random.randn(5)}) - return df diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index b63151dfb459e..ab633e022540e 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -21,7 +21,9 @@ class TestDataFrameAlterAxes(): - def test_set_index_directly(self, float_string_frame): + def test_set_index_directly(self): + float_string_frame = tm.get_float_string_frame() + df = float_string_frame idx = Index(np.arange(len(df))[::-1]) @@ -30,7 +32,9 @@ def test_set_index_directly(self, float_string_frame): with pytest.raises(ValueError, match='Length mismatch'): df.index = idx[::2] - def test_set_index(self, float_string_frame): + def test_set_index(self): + float_string_frame = tm.get_float_string_frame() + df = float_string_frame idx = Index(np.arange(len(df))[::-1]) @@ -51,9 +55,8 @@ def test_set_index_cast(self): ('tuple', 'as', 'label')]) @pytest.mark.parametrize('inplace', [True, False]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_drop_inplace(self, frame_of_index_cols, - drop, inplace, keys): - df = frame_of_index_cols + def test_set_index_drop_inplace(self, drop, inplace, keys): + df = tm.get_frame_of_index_cols() if isinstance(keys, list): idx = MultiIndex.from_arrays([df[x] for x in keys], names=keys) @@ -74,8 +77,8 @@ def test_set_index_drop_inplace(self, frame_of_index_cols, @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B'], ('tuple', 'as', 'label')]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_append(self, frame_of_index_cols, drop, keys): - df = frame_of_index_cols + def test_set_index_append(self, drop, keys): + df = tm.get_frame_of_index_cols() keys = keys if isinstance(keys, list) else [keys] idx = MultiIndex.from_arrays([df.index] + [df[x] for x in keys], @@ -91,9 +94,9 @@ def test_set_index_append(self, frame_of_index_cols, drop, keys): @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B'], ('tuple', 'as', 'label')]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_append_to_multiindex(self, frame_of_index_cols, - drop, keys): + def test_set_index_append_to_multiindex(self, drop, keys): # append to existing multiindex + frame_of_index_cols = tm.get_frame_of_index_cols() df = frame_of_index_cols.set_index(['D'], drop=drop, append=True) keys = keys if isinstance(keys, list) else [keys] @@ -123,9 +126,8 @@ def test_set_index_after_mutation(self): @pytest.mark.parametrize('append, index_name', [(True, None), (True, 'B'), (True, 'test'), (False, None)]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_pass_single_array(self, frame_of_index_cols, - drop, append, index_name, box): - df = frame_of_index_cols + def test_set_index_pass_single_array(self, drop, append, index_name, box): + df = tm.get_frame_of_index_cols() df.index.name = index_name key = box(df['B']) @@ -157,9 +159,8 @@ def test_set_index_pass_single_array(self, frame_of_index_cols, [(True, None), (True, 'A'), (True, 'B'), (True, 'test'), (False, None)]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_pass_arrays(self, frame_of_index_cols, - drop, append, index_name, box): - df = frame_of_index_cols + def test_set_index_pass_arrays(self, drop, append, index_name, box): + df = tm.get_frame_of_index_cols() df.index.name = index_name keys = ['A', box(df['B'])] @@ -190,9 +191,9 @@ def test_set_index_pass_arrays(self, frame_of_index_cols, @pytest.mark.parametrize('append, index_name', [(True, None), (True, 'A'), (True, 'test'), (False, None)]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop, + def test_set_index_pass_arrays_duplicate(self, drop, append, index_name, box1, box2): - df = frame_of_index_cols + df = tm.get_frame_of_index_cols() df.index.name = index_name keys = [box1(df['A']), box2(df['A'])] @@ -215,9 +216,8 @@ def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop, @pytest.mark.parametrize('append', [True, False]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_pass_multiindex(self, frame_of_index_cols, - drop, append): - df = frame_of_index_cols + def test_set_index_pass_multiindex(self, drop, append): + df = tm.get_frame_of_index_cols() keys = MultiIndex.from_arrays([df['A'], df['B']], names=['A', 'B']) result = df.set_index(keys, drop=drop, append=append) @@ -227,8 +227,8 @@ def test_set_index_pass_multiindex(self, frame_of_index_cols, tm.assert_frame_equal(result, expected) - def test_set_index_verify_integrity(self, frame_of_index_cols): - df = frame_of_index_cols + def test_set_index_verify_integrity(self): + df = tm.get_frame_of_index_cols() with pytest.raises(ValueError, match='Index has duplicate keys'): df.set_index('A', verify_integrity=True) @@ -238,8 +238,8 @@ def test_set_index_verify_integrity(self, frame_of_index_cols): @pytest.mark.parametrize('append', [True, False]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_raise(self, frame_of_index_cols, drop, append): - df = frame_of_index_cols + def test_set_index_raise(self, drop, append): + df = tm.get_frame_of_index_cols() with pytest.raises(KeyError, match="['foo', 'bar', 'baz']"): # column names are A-E, as well as one tuple @@ -429,7 +429,9 @@ def test_set_index_empty_column(self): names=['a', 'x']) tm.assert_frame_equal(result, expected) - def test_set_columns(self, float_string_frame): + def test_set_columns(self): + float_string_frame = tm.get_float_string_frame() + cols = Index(np.arange(len(float_string_frame.columns))) float_string_frame.columns = cols with pytest.raises(ValueError, match='Length mismatch'): @@ -461,7 +463,9 @@ def test_dti_set_index_reindex(self): # Renaming - def test_rename(self, float_frame): + def test_rename(self): + float_frame = DataFrame(tm.getSeriesData()) + mapping = { 'A': 'a', 'B': 'b', @@ -508,8 +512,10 @@ def test_rename(self, float_frame): Index(['bar', 'foo'], name='name')) assert renamed.index.name == renamer.index.name - def test_rename_axis_inplace(self, float_frame): + def test_rename_axis_inplace(self): # GH 15704 + float_frame = DataFrame(tm.getSeriesData()) + expected = float_frame.rename_axis('foo') result = float_frame.copy() no_return = result.rename_axis('foo', inplace=True) @@ -664,12 +670,16 @@ def test_rename_multiindex(self): level=0) tm.assert_index_equal(renamed.index, new_index) - def test_rename_nocopy(self, float_frame): + def test_rename_nocopy(self): + float_frame = DataFrame(tm.getSeriesData()) + renamed = float_frame.rename(columns={'C': 'foo'}, copy=False) renamed['foo'] = 1. assert (float_frame['C'] == 1.).all() - def test_rename_inplace(self, float_frame): + def test_rename_inplace(self): + float_frame = DataFrame(tm.getSeriesData()) + float_frame.rename(columns={'C': 'foo'}) assert 'C' in float_frame assert 'foo' not in float_frame @@ -748,7 +758,9 @@ def test_reorder_levels(self): result = df.reorder_levels(['L0', 'L0', 'L0']) tm.assert_frame_equal(result, expected) - def test_reset_index(self, float_frame): + def test_reset_index(self): + float_frame = DataFrame(tm.getSeriesData()) + stacked = float_frame.stack()[::2] stacked = DataFrame({'foo': stacked, 'bar': stacked}) @@ -1004,7 +1016,9 @@ def test_set_index_names(self): # Check equality tm.assert_index_equal(df.set_index([df.index, idx2]).index, mi2) - def test_rename_objects(self, float_string_frame): + def test_rename_objects(self): + float_string_frame = tm.get_float_string_frame() + renamed = float_string_frame.rename(columns=str.upper) assert 'FOO' in renamed @@ -1128,7 +1142,8 @@ def test_rename_positional(self): assert 'rename' in message assert 'Use named arguments' in message - def test_assign_columns(self, float_frame): + def test_assign_columns(self): + float_frame = DataFrame(tm.getSeriesData()) float_frame['hi'] = 'there' df = float_frame.copy() diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 9f64b71ea455c..318bf813c443e 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -241,21 +241,27 @@ class TestDataFrameAnalytics(): # Correlation and covariance @td.skip_if_no_scipy - def test_corr_pearson(self, float_frame): + def test_corr_pearson(self): + float_frame = DataFrame(tm.getSeriesData()) + float_frame['A'][:5] = nan float_frame['B'][5:10] = nan self._check_method(float_frame, 'pearson') @td.skip_if_no_scipy - def test_corr_kendall(self, float_frame): + def test_corr_kendall(self): + float_frame = DataFrame(tm.getSeriesData()) + float_frame['A'][:5] = nan float_frame['B'][5:10] = nan self._check_method(float_frame, 'kendall') @td.skip_if_no_scipy - def test_corr_spearman(self, float_frame): + def test_corr_spearman(self): + float_frame = DataFrame(tm.getSeriesData()) + float_frame['A'][:5] = nan float_frame['B'][5:10] = nan @@ -267,7 +273,10 @@ def _check_method(self, frame, method='pearson'): tm.assert_almost_equal(correls['A']['C'], expected) @td.skip_if_no_scipy - def test_corr_non_numeric(self, float_frame, float_string_frame): + def test_corr_non_numeric(self): + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + float_frame['A'][:5] = nan float_frame['B'][5:10] = nan @@ -341,8 +350,11 @@ def test_corr_invalid_method(self): with pytest.raises(ValueError, match=msg): df.corr(method="____") - def test_cov(self, float_frame, float_string_frame): + def test_cov(self): # min_periods no NAs (corner case) + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + expected = float_frame.cov() result = float_frame.cov(min_periods=len(float_frame)) @@ -385,7 +397,9 @@ def test_cov(self, float_frame, float_string_frame): index=df.columns, columns=df.columns) tm.assert_frame_equal(result, expected) - def test_corrwith(self, datetime_frame): + def test_corrwith(self): + datetime_frame = DataFrame(tm.getTimeSeriesData()) + a = datetime_frame noise = Series(randn(len(a)), index=a.index) @@ -434,7 +448,9 @@ def test_corrwith_with_objects(self): expected = df1.loc[:, cols].corrwith(df2.loc[:, cols], axis=1) tm.assert_series_equal(result, expected) - def test_corrwith_series(self, datetime_frame): + def test_corrwith_series(self): + datetime_frame = DataFrame(tm.getTimeSeriesData()) + result = datetime_frame.corrwith(datetime_frame['A']) expected = datetime_frame.apply(datetime_frame['A'].corr) @@ -709,7 +725,12 @@ def test_reduce_mixed_frame(self): np.array([2, 150, 'abcde'], dtype=object)) tm.assert_series_equal(test, df.T.sum(axis=1)) - def test_count(self, float_frame_with_na, float_frame, float_string_frame): + def test_count(self): + + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + f = lambda s: notna(s).sum() assert_stat_op_calc('count', f, float_frame_with_na, has_skipna=False, check_dtype=False, check_dates=True) @@ -740,8 +761,12 @@ def test_count(self, float_frame_with_na, float_frame, float_string_frame): expected = Series(0, index=[]) tm.assert_series_equal(result, expected) - def test_nunique(self, float_frame_with_na, float_frame, - float_string_frame): + def test_nunique(self): + + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + f = lambda s: len(algorithms.unique1d(s.dropna())) assert_stat_op_calc('nunique', f, float_frame_with_na, has_skipna=False, check_dtype=False, @@ -758,8 +783,13 @@ def test_nunique(self, float_frame_with_na, float_frame, tm.assert_series_equal(df.nunique(axis=1, dropna=False), Series({0: 1, 1: 3, 2: 2})) - def test_sum(self, float_frame_with_na, mixed_float_frame, - float_frame, float_string_frame): + def test_sum(self): + + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + mixed_float_frame = tm.get_mixed_float_frame() + float_frame = DataFrame(tm.getSeriesData()) + assert_stat_op_api('sum', float_frame, float_string_frame, has_numeric_only=True) assert_stat_op_calc('sum', np.sum, float_frame_with_na, @@ -792,20 +822,33 @@ def test_stat_operators_attempt_obj_array(self, method): if method in ['sum', 'prod']: tm.assert_series_equal(result, expected) - def test_mean(self, float_frame_with_na, float_frame, float_string_frame): + def test_mean(self): + + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + assert_stat_op_calc('mean', np.mean, float_frame_with_na, check_dates=True) assert_stat_op_api('mean', float_frame, float_string_frame) - def test_product(self, float_frame_with_na, float_frame, - float_string_frame): + def test_product(self): + + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + assert_stat_op_calc('product', np.prod, float_frame_with_na) assert_stat_op_api('product', float_frame, float_string_frame) # TODO: Ensure warning isn't emitted in the first place @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") - def test_median(self, float_frame_with_na, float_frame, - float_string_frame): + def test_median(self): + + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + def wrapper(x): if isna(x).any(): return np.nan @@ -815,8 +858,12 @@ def wrapper(x): check_dates=True) assert_stat_op_api('median', float_frame, float_string_frame) - def test_min(self, float_frame_with_na, int_frame, - float_frame, float_string_frame): + def test_min(self): + int_frame = tm.get_int_frame() + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) assert_stat_op_calc('min', np.min, float_frame_with_na, @@ -824,7 +871,9 @@ def test_min(self, float_frame_with_na, int_frame, assert_stat_op_calc('min', np.min, int_frame) assert_stat_op_api('min', float_frame, float_string_frame) - def test_cummin(self, datetime_frame): + def test_cummin(self): + datetime_frame = DataFrame(tm.getTimeSeriesData()) + datetime_frame.loc[5:10, 0] = nan datetime_frame.loc[10:15, 1] = nan datetime_frame.loc[15:, 2] = nan @@ -847,7 +896,9 @@ def test_cummin(self, datetime_frame): cummin_xs = datetime_frame.cummin(axis=1) assert np.shape(cummin_xs) == np.shape(datetime_frame) - def test_cummax(self, datetime_frame): + def test_cummax(self): + datetime_frame = DataFrame(tm.getTimeSeriesData()) + datetime_frame.loc[5:10, 0] = nan datetime_frame.loc[10:15, 1] = nan datetime_frame.loc[15:, 2] = nan @@ -870,8 +921,13 @@ def test_cummax(self, datetime_frame): cummax_xs = datetime_frame.cummax(axis=1) assert np.shape(cummax_xs) == np.shape(datetime_frame) - def test_max(self, float_frame_with_na, int_frame, - float_frame, float_string_frame): + def test_max(self): + + int_frame = tm.get_int_frame() + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) assert_stat_op_calc('max', np.max, float_frame_with_na, @@ -879,13 +935,21 @@ def test_max(self, float_frame_with_na, int_frame, assert_stat_op_calc('max', np.max, int_frame) assert_stat_op_api('max', float_frame, float_string_frame) - def test_mad(self, float_frame_with_na, float_frame, float_string_frame): + def test_mad(self): + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + f = lambda x: np.abs(x - x.mean()).mean() assert_stat_op_calc('mad', f, float_frame_with_na) assert_stat_op_api('mad', float_frame, float_string_frame) - def test_var_std(self, float_frame_with_na, datetime_frame, float_frame, - float_string_frame): + def test_var_std(self): + datetime_frame = DataFrame(tm.getTimeSeriesData()) + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + alt = lambda x: np.var(x, ddof=1) assert_stat_op_calc('var', alt, float_frame_with_na) assert_stat_op_api('var', float_frame, float_string_frame) @@ -951,7 +1015,9 @@ def test_mixed_ops(self, op): result = getattr(df, op)() assert len(result) == 2 - def test_cumsum(self, datetime_frame): + def test_cumsum(self): + datetime_frame = DataFrame(tm.getTimeSeriesData()) + datetime_frame.loc[5:10, 0] = nan datetime_frame.loc[10:15, 1] = nan datetime_frame.loc[15:, 2] = nan @@ -974,7 +1040,9 @@ def test_cumsum(self, datetime_frame): cumsum_xs = datetime_frame.cumsum(axis=1) assert np.shape(cumsum_xs) == np.shape(datetime_frame) - def test_cumprod(self, datetime_frame): + def test_cumprod(self): + datetime_frame = DataFrame(tm.getTimeSeriesData()) + datetime_frame.loc[5:10, 0] = nan datetime_frame.loc[10:15, 1] = nan datetime_frame.loc[15:, 2] = nan @@ -1003,8 +1071,13 @@ def test_cumprod(self, datetime_frame): df.cumprod(0) df.cumprod(1) - def test_sem(self, float_frame_with_na, datetime_frame, - float_frame, float_string_frame): + def test_sem(self): + + datetime_frame = DataFrame(tm.getTimeSeriesData()) + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) assert_stat_op_calc('sem', alt, float_frame_with_na) assert_stat_op_api('sem', float_frame, float_string_frame) @@ -1023,9 +1096,13 @@ def test_sem(self, float_frame_with_na, datetime_frame, assert not (result < 0).any() @td.skip_if_no_scipy - def test_skew(self, float_frame_with_na, float_frame, float_string_frame): + def test_skew(self): from scipy.stats import skew + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + def alt(x): if len(x) < 3: return np.nan @@ -1035,9 +1112,13 @@ def alt(x): assert_stat_op_api('skew', float_frame, float_string_frame) @td.skip_if_no_scipy - def test_kurt(self, float_frame_with_na, float_frame, float_string_frame): + def test_kurt(self): from scipy.stats import kurtosis + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + def alt(x): if len(x) < 4: return np.nan @@ -1206,7 +1287,9 @@ def test_operators_timedelta64(self): assert df['off1'].dtype == 'timedelta64[ns]' assert df['off2'].dtype == 'timedelta64[ns]' - def test_sum_corner(self, empty_frame): + def test_sum_corner(self): + empty_frame = DataFrame({}) + axis0 = empty_frame.sum(0) axis1 = empty_frame.sum(1) assert isinstance(axis0, Series) @@ -1274,21 +1357,28 @@ def test_sum_nanops_timedelta(self): expected = pd.Series([0, 0, np.nan], dtype='m8[ns]', index=idx) tm.assert_series_equal(result, expected) - def test_sum_object(self, float_frame): + def test_sum_object(self): + float_frame = DataFrame(tm.getSeriesData()) + values = float_frame.values.astype(int) frame = DataFrame(values, index=float_frame.index, columns=float_frame.columns) deltas = frame * timedelta(1) deltas.sum() - def test_sum_bool(self, float_frame): + def test_sum_bool(self): # ensure this works, bug report + float_frame = DataFrame(tm.getSeriesData()) + bools = np.isnan(float_frame) bools.sum(1) bools.sum(0) - def test_mean_corner(self, float_frame, float_string_frame): + def test_mean_corner(self): # unit test when have object data + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + the_mean = float_string_frame.mean(axis=0) the_sum = float_string_frame.sum(axis=0, numeric_only=True) tm.assert_index_equal(the_sum.index, the_mean.index) @@ -1304,8 +1394,10 @@ def test_mean_corner(self, float_frame, float_string_frame): means = float_frame.mean(0) assert means['bool'] == float_frame['bool'].values.mean() - def test_stats_mixed_type(self, float_string_frame): + def test_stats_mixed_type(self): # don't blow up + float_string_frame = tm.get_float_string_frame() + float_string_frame.std(1) float_string_frame.var(1) float_string_frame.mean(1) @@ -1313,7 +1405,12 @@ def test_stats_mixed_type(self, float_string_frame): # TODO: Ensure warning isn't emitted in the first place @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") - def test_median_corner(self, int_frame, float_frame, float_string_frame): + def test_median_corner(self): + + int_frame = tm.get_int_frame() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + def wrapper(x): if isna(x).any(): return np.nan @@ -1325,7 +1422,9 @@ def wrapper(x): # Miscellanea - def test_count_objects(self, float_string_frame): + def test_count_objects(self): + float_string_frame = tm.get_float_string_frame() + dm = DataFrame(float_string_frame._series) df = DataFrame(float_string_frame._series) @@ -1345,7 +1444,11 @@ def test_sum_bools(self): # Index of max / min - def test_idxmin(self, float_frame, int_frame): + def test_idxmin(self): + + int_frame = tm.get_int_frame() + float_frame = DataFrame(tm.getSeriesData()) + frame = float_frame frame.loc[5:10] = np.nan frame.loc[15:20, -2:] = np.nan @@ -1359,7 +1462,11 @@ def test_idxmin(self, float_frame, int_frame): pytest.raises(ValueError, frame.idxmin, axis=2) - def test_idxmax(self, float_frame, int_frame): + def test_idxmax(self): + + int_frame = tm.get_int_frame() + float_frame = DataFrame(tm.getSeriesData()) + frame = float_frame frame.loc[5:10] = np.nan frame.loc[15:20, -2:] = np.nan @@ -1377,7 +1484,15 @@ def test_idxmax(self, float_frame, int_frame): # Logical reductions @pytest.mark.parametrize('opname', ['any', 'all']) - def test_any_all(self, opname, bool_frame_with_na, float_string_frame): + def test_any_all(self, opname): + float_string_frame = tm.get_float_string_frame() + + df = DataFrame(tm.getSeriesData()) > 0 + bool_frame_with_na = df.astype(object) + # set some NAs + bool_frame_with_na.loc[5:10] = np.nan + bool_frame_with_na.loc[15:20, -2:] = np.nan + assert_bool_op_calc(opname, getattr(np, opname), bool_frame_with_na, has_skipna=True) assert_bool_op_api(opname, bool_frame_with_na, float_string_frame, @@ -1872,7 +1987,9 @@ def test_pct_change(self): tm.assert_frame_equal(result, expected) # Clip - def test_clip(self, float_frame): + def test_clip(self): + float_frame = DataFrame(tm.getSeriesData()) + median = float_frame.median().median() original = float_frame.copy() @@ -1890,8 +2007,10 @@ def test_clip(self, float_frame): # Verify that float_frame was not changed inplace assert (float_frame.values == original.values).all() - def test_inplace_clip(self, float_frame): + def test_inplace_clip(self): # GH 15388 + float_frame = DataFrame(tm.getSeriesData()) + median = float_frame.median().median() frame_copy = float_frame.copy() @@ -1976,9 +2095,9 @@ def test_clip_against_series(self, inplace): (0, [[2., 2., 3.], [4., 5., 6.], [7., 7., 7.]]), (1, [[2., 3., 4.], [4., 5., 6.], [5., 6., 7.]]) ]) - def test_clip_against_list_like(self, simple_frame, - inplace, lower, axis, res): + def test_clip_against_list_like(self, inplace, lower, axis, res): # GH 15390 + simple_frame = tm.get_simple_frame() original = simple_frame.copy(deep=True) result = original.clip(lower=lower, upper=[5, 6, 7], @@ -2023,9 +2142,11 @@ def test_clip_against_unordered_columns(self): tm.assert_frame_equal(result_lower, expected_lower) tm.assert_frame_equal(result_lower_upper, expected_lower_upper) - def test_clip_with_na_args(self, float_frame): + def test_clip_with_na_args(self): """Should process np.nan argument as None """ # GH 17276 + float_frame = DataFrame(tm.getSeriesData()) + tm.assert_frame_equal(float_frame.clip(np.nan), float_frame) tm.assert_frame_equal(float_frame.clip(upper=np.nan, lower=np.nan), float_frame) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index c1be64829c303..1cd04ac2c3ee0 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -37,9 +37,12 @@ def _assert_series_equal(self, left, right): """Dispatch to series class dependent assertion""" raise NotImplementedError - def test_copy_index_name_checking(self, float_frame): + def test_copy_index_name_checking(self): # don't want to be able to modify the index stored elsewhere after # making a copy + + float_frame = DataFrame(tm.getSeriesData()) + for attr in ('index', 'columns'): ind = getattr(float_frame, attr) ind.name = None @@ -47,7 +50,9 @@ def test_copy_index_name_checking(self, float_frame): getattr(cp, attr).name = 'foo' assert getattr(float_frame, attr).name is None - def test_getitem_pop_assign_name(self, float_frame): + def test_getitem_pop_assign_name(self): + float_frame = DataFrame(tm.getSeriesData()) + s = float_frame['A'] assert s.name == 'A' @@ -60,7 +65,9 @@ def test_getitem_pop_assign_name(self, float_frame): s2 = s.loc[:] assert s2.name == 'B' - def test_get_value(self, float_frame): + def test_get_value(self): + float_frame = DataFrame(tm.getSeriesData()) + for idx in float_frame.index: for col in float_frame.columns: with tm.assert_produces_warning(FutureWarning, @@ -69,7 +76,9 @@ def test_get_value(self, float_frame): expected = float_frame[col][idx] tm.assert_almost_equal(result, expected) - def test_add_prefix_suffix(self, float_frame): + def test_add_prefix_suffix(self): + float_frame = DataFrame(tm.getSeriesData()) + with_prefix = float_frame.add_prefix('foo#') expected = pd.Index(['foo#%s' % c for c in float_frame.columns]) tm.assert_index_equal(with_prefix.columns, expected) @@ -86,7 +95,9 @@ def test_add_prefix_suffix(self, float_frame): expected = pd.Index(['{}%'.format(c) for c in float_frame.columns]) tm.assert_index_equal(with_pct_suffix.columns, expected) - def test_get_axis(self, float_frame): + def test_get_axis(self): + float_frame = DataFrame(tm.getSeriesData()) + f = float_frame assert f._get_axis_number(0) == 0 assert f._get_axis_number(1) == 1 @@ -115,11 +126,15 @@ def test_get_axis(self, float_frame): with pytest.raises(ValueError, match='No axis named'): f._get_axis_number(None) - def test_keys(self, float_frame): + def test_keys(self): + float_frame = DataFrame(tm.getSeriesData()) + getkeys = float_frame.keys assert getkeys() is float_frame.columns - def test_column_contains_typeerror(self, float_frame): + def test_column_contains_typeerror(self): + float_frame = DataFrame(tm.getSeriesData()) + try: float_frame.columns in float_frame except TypeError: @@ -143,7 +158,8 @@ def test_tab_completion(self): assert key not in dir(df) assert isinstance(df.__getitem__('A'), pd.DataFrame) - def test_not_hashable(self, empty_frame): + def test_not_hashable(self): + empty_frame = DataFrame({}) df = self.klass([1]) pytest.raises(TypeError, hash, df) pytest.raises(TypeError, hash, empty_frame) @@ -154,7 +170,9 @@ def test_new_empty_index(self): df1.index.name = 'foo' assert df2.index.name is None - def test_array_interface(self, float_frame): + def test_array_interface(self): + float_frame = DataFrame(tm.getSeriesData()) + with np.errstate(all='ignore'): result = np.sqrt(float_frame) assert isinstance(result, type(float_frame)) @@ -163,7 +181,9 @@ def test_array_interface(self, float_frame): self._assert_frame_equal(result, float_frame.apply(np.sqrt)) - def test_get_agg_axis(self, float_frame): + def test_get_agg_axis(self): + float_frame = DataFrame(tm.getSeriesData()) + cols = float_frame._get_agg_axis(0) assert cols is float_frame.columns @@ -172,7 +192,12 @@ def test_get_agg_axis(self, float_frame): pytest.raises(ValueError, float_frame._get_agg_axis, 2) - def test_nonzero(self, float_frame, float_string_frame, empty_frame): + def test_nonzero(self): + float_frame = DataFrame(tm.getSeriesData()) + + float_string_frame = tm.get_float_string_frame() + empty_frame = DataFrame({}) + assert empty_frame.empty assert not float_frame.empty @@ -199,10 +224,15 @@ def test_items(self): assert isinstance(v, Series) assert (df[k] == v).all() - def test_iter(self, float_frame): + def test_iter(self): + float_frame = DataFrame(tm.getSeriesData()) + assert tm.equalContents(list(float_frame), float_frame.columns) - def test_iterrows(self, float_frame, float_string_frame): + def test_iterrows(self): + float_frame = DataFrame(tm.getSeriesData()) + float_string_frame = tm.get_float_string_frame() + for k, v in float_frame.iterrows(): exp = float_frame.loc[k] self._assert_series_equal(v, exp) @@ -223,7 +253,9 @@ def test_iterrows_iso8601(self): exp = s.loc[k] self._assert_series_equal(v, exp) - def test_itertuples(self, float_frame): + def test_itertuples(self): + float_frame = DataFrame(tm.getSeriesData()) + for i, tup in enumerate(float_frame.itertuples()): s = self.klass._constructor_sliced(tup[1:]) s.name = tup[0] @@ -286,10 +318,14 @@ def test_sequence_like_with_categorical(self): for c, col in df.iteritems(): str(s) - def test_len(self, float_frame): + def test_len(self): + float_frame = DataFrame(tm.getSeriesData()) assert len(float_frame) == len(float_frame.index) - def test_values(self, float_frame, float_string_frame): + def test_values(self): + float_frame = DataFrame(tm.getSeriesData()) + float_string_frame = tm.get_float_string_frame() + frame = float_frame arr = frame.values @@ -334,7 +370,8 @@ def test_to_numpy_copy(self): assert df.to_numpy(copy=False).base is arr assert df.to_numpy(copy=True).base is None - def test_transpose(self, float_frame): + def test_transpose(self): + float_frame = DataFrame(tm.getSeriesData()) frame = float_frame dft = frame.T for idx, series in compat.iteritems(dft): @@ -359,7 +396,8 @@ def test_swapaxes(self): self._assert_frame_equal(df, df.swapaxes(0, 0)) pytest.raises(ValueError, df.swapaxes, 2, 5) - def test_axis_aliases(self, float_frame): + def test_axis_aliases(self): + float_frame = DataFrame(tm.getSeriesData()) f = float_frame # reg name @@ -377,22 +415,25 @@ def test_class_axis(self): assert pydoc.getdoc(DataFrame.index) assert pydoc.getdoc(DataFrame.columns) - def test_more_values(self, float_string_frame): + def test_more_values(self): + float_string_frame = tm.get_float_string_frame() values = float_string_frame.values assert values.shape[1] == len(float_string_frame.columns) - def test_repr_with_mi_nat(self, float_string_frame): + def test_repr_with_mi_nat(self): df = self.klass({'X': [1, 2]}, index=[[pd.NaT, pd.Timestamp('20130101')], ['a', 'b']]) result = repr(df) expected = ' X\nNaT a 1\n2013-01-01 b 2' assert result == expected - def test_iteritems_names(self, float_string_frame): + def test_iteritems_names(self): + float_string_frame = tm.get_float_string_frame() for k, v in compat.iteritems(float_string_frame): assert v.name == k - def test_series_put_names(self, float_string_frame): + def test_series_put_names(self): + float_string_frame = tm.get_float_string_frame() series = float_string_frame._series for k, v in compat.iteritems(series): assert v.name == k @@ -434,26 +475,30 @@ class TestDataFrameMisc(SharedWithSparse): _assert_frame_equal = staticmethod(assert_frame_equal) _assert_series_equal = staticmethod(assert_series_equal) - def test_values(self, float_frame): + def test_values(self): + float_frame = DataFrame(tm.getSeriesData()) float_frame.values[:, 0] = 5. assert (float_frame.values[:, 0] == 5).all() - def test_as_matrix_deprecated(self, float_frame): + def test_as_matrix_deprecated(self): # GH 18458 + float_frame = DataFrame(tm.getSeriesData()) with tm.assert_produces_warning(FutureWarning): cols = float_frame.columns.tolist() result = float_frame.as_matrix(columns=cols) expected = float_frame.values tm.assert_numpy_array_equal(result, expected) - def test_deepcopy(self, float_frame): + def test_deepcopy(self): + float_frame = DataFrame(tm.getSeriesData()) cp = deepcopy(float_frame) series = cp['A'] series[:] = 10 for idx, value in compat.iteritems(series): assert float_frame['A'][idx] != value - def test_transpose_get_view(self, float_frame): + def test_transpose_get_view(self): + float_frame = DataFrame(tm.getSeriesData()) dft = float_frame.T dft.values[:, 5:10] = 5 diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index ade527a16c902..5425e44b15046 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -36,7 +36,9 @@ def int_frame_const_col(): class TestDataFrameApply(): - def test_apply(self, float_frame): + def test_apply(self): + float_frame = DataFrame(tm.getSeriesData()) + with np.errstate(all='ignore'): # ufunc applied = float_frame.apply(np.sqrt) @@ -74,14 +76,17 @@ def test_apply_mixed_datetimelike(self): result = df.apply(lambda x: x, axis=1) assert_frame_equal(result, df) - def test_apply_empty(self, float_frame, empty_frame): + def test_apply_empty(self): # empty + empty_frame = DataFrame({}) + applied = empty_frame.apply(np.sqrt) assert applied.empty applied = empty_frame.apply(np.mean) assert applied.empty + float_frame = DataFrame(tm.getSeriesData()) no_rows = float_frame[:0] result = no_rows.apply(lambda x: x.mean()) expected = Series(np.nan, index=float_frame.columns) @@ -97,8 +102,10 @@ def test_apply_empty(self, float_frame, empty_frame): result = expected.apply(lambda x: x['a'], axis=1) assert_frame_equal(expected, result) - def test_apply_with_reduce_empty(self, empty_frame): + def test_apply_with_reduce_empty(self): # reduce with an empty DataFrame + empty_frame = DataFrame({}) + x = [] result = empty_frame.apply(x.append, axis=1, result_type='expand') assert_frame_equal(result, empty_frame) @@ -116,7 +123,9 @@ def test_apply_with_reduce_empty(self, empty_frame): # Ensure that x.append hasn't been called assert x == [] - def test_apply_deprecate_reduce(self, empty_frame): + def test_apply_deprecate_reduce(self): + empty_frame = DataFrame({}) + x = [] with tm.assert_produces_warning(FutureWarning): empty_frame.apply(x.append, axis=1, reduce=True) @@ -140,16 +149,21 @@ def test_apply_standard_nonunique(self): pytest.param([], {'numeric_only': True}, id='optional_kwds'), pytest.param([1, None], {'numeric_only': True}, id='args_and_kwds') ]) - def test_apply_with_string_funcs(self, float_frame, func, args, kwds): + def test_apply_with_string_funcs(self, func, args, kwds): + float_frame = DataFrame(tm.getSeriesData()) + result = float_frame.apply(func, *args, **kwds) expected = getattr(float_frame, func)(*args, **kwds) tm.assert_series_equal(result, expected) - def test_apply_broadcast_deprecated(self, float_frame): + def test_apply_broadcast_deprecated(self): + float_frame = DataFrame(tm.getSeriesData()) + with tm.assert_produces_warning(FutureWarning): float_frame.apply(np.mean, broadcast=True) - def test_apply_broadcast(self, float_frame, int_frame_const_col): + def test_apply_broadcast(self, int_frame_const_col): + float_frame = DataFrame(tm.getSeriesData()) # scalars result = float_frame.apply(np.mean, result_type='broadcast') @@ -208,7 +222,9 @@ def test_apply_broadcast_error(self, int_frame_const_col): with pytest.raises(ValueError): df.apply(lambda x: Series([1, 2]), axis=1, result_type='broadcast') - def test_apply_raw(self, float_frame): + def test_apply_raw(self): + float_frame = DataFrame(tm.getSeriesData()) + result0 = float_frame.apply(np.mean, raw=True) result1 = float_frame.apply(np.mean, axis=1, raw=True) @@ -223,12 +239,16 @@ def test_apply_raw(self, float_frame): expected = float_frame * 2 assert_frame_equal(result, expected) - def test_apply_axis1(self, float_frame): + def test_apply_axis1(self): + float_frame = DataFrame(tm.getSeriesData()) + d = float_frame.index[0] tapplied = float_frame.apply(np.mean, axis=1) assert tapplied[d] == np.mean(float_frame.xs(d)) - def test_apply_ignore_failures(self, float_string_frame): + def test_apply_ignore_failures(self): + float_string_frame = tm.get_float_string_frame() + result = frame_apply(float_string_frame, np.mean, 0, ignore_failures=True).apply_standard() expected = float_string_frame._get_numeric_data().apply(np.mean) @@ -286,7 +306,9 @@ def _checkit(axis=0, raw=False): result = no_cols.apply(lambda x: x.mean(), result_type='broadcast') assert isinstance(result, DataFrame) - def test_apply_with_args_kwds(self, float_frame): + def test_apply_with_args_kwds(self): + float_frame = DataFrame(tm.getSeriesData()) + def add_some(x, howmuch=0): return x + howmuch @@ -308,11 +330,15 @@ def subtract_and_divide(x, sub, divide=1): expected = float_frame.apply(lambda x: (x - 2.) / 2.) assert_frame_equal(result, expected) - def test_apply_yield_list(self, float_frame): + def test_apply_yield_list(self): + float_frame = DataFrame(tm.getSeriesData()) + result = float_frame.apply(list) assert_frame_equal(result, float_frame) - def test_apply_reduce_Series(self, float_frame): + def test_apply_reduce_Series(self): + float_frame = DataFrame(tm.getSeriesData()) + float_frame.loc[::2, 'A'] = np.nan expected = float_frame.mean(1) result = float_frame.apply(np.mean, axis=1) @@ -406,7 +432,9 @@ def test_apply_convert_objects(self): result = data.apply(lambda x: x, axis=1) assert_frame_equal(result._convert(datetime=True), data) - def test_apply_attach_name(self, float_frame): + def test_apply_attach_name(self): + float_frame = DataFrame(tm.getSeriesData()) + result = float_frame.apply(lambda x: x.name) expected = Series(float_frame.columns, index=float_frame.columns) assert_series_equal(result, expected) @@ -430,7 +458,8 @@ def test_apply_attach_name(self, float_frame): expected.index = float_frame.index assert_series_equal(result, expected) - def test_apply_multi_index(self, float_frame): + def test_apply_multi_index(self): + index = MultiIndex.from_arrays([['a', 'a', 'b'], ['c', 'd', 'd']]) s = DataFrame([[1, 2], [3, 4], [5, 6]], index=index, @@ -461,7 +490,9 @@ def test_apply_dict(self): assert_frame_equal(reduce_false, df) assert_series_equal(reduce_none, dicts) - def test_applymap(self, float_frame): + def test_applymap(self): + float_frame = DataFrame(tm.getSeriesData()) + applied = float_frame.applymap(lambda x: x * 2) tm.assert_frame_equal(applied, float_frame * 2) float_frame.applymap(type) @@ -823,7 +854,9 @@ def zip_frames(frames, axis=1): class TestDataFrameAggregate(): - def test_agg_transform(self, axis, float_frame): + def test_agg_transform(self, axis): + float_frame = DataFrame(tm.getSeriesData()) + other_axis = 1 if axis in {0, 'index'} else 0 with np.errstate(all='ignore'): @@ -872,7 +905,9 @@ def test_agg_transform(self, axis, float_frame): result = float_frame.transform([np.abs, 'sqrt'], axis=axis) assert_frame_equal(result, expected) - def test_transform_and_agg_err(self, axis, float_frame): + def test_transform_and_agg_err(self, axis): + float_frame = DataFrame(tm.getSeriesData()) + # cannot both transform and agg with pytest.raises(ValueError): float_frame.transform(['max', 'min'], axis=axis) @@ -952,7 +987,9 @@ def test_agg_dict_nested_renaming_depr(self): df.agg({'A': {'foo': 'min'}, 'B': {'bar': 'max'}}) - def test_agg_reduce(self, axis, float_frame): + def test_agg_reduce(self, axis): + float_frame = DataFrame(tm.getSeriesData()) + other_axis = 1 if axis in {0, 'index'} else 0 name1, name2 = float_frame.axes[other_axis].unique()[:2].sort_values() diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index f14ecae448723..10493c816ecac 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -65,11 +65,11 @@ def check(df, df2): def test_timestamp_compare(self): # make sure we can compare Timestamps on the right AND left hand side # GH#4982 - df = pd. DataFrame({'dates1': pd.date_range('20010101', periods=10), - 'dates2': pd.date_range('20010102', periods=10), - 'intcol': np.random.randint(1000000000, size=10), - 'floatcol': np.random.randn(10), - 'stringcol': list(tm.rands(10))}) + df = pd.DataFrame({'dates1': pd.date_range('20010101', periods=10), + 'dates2': pd.date_range('20010102', periods=10), + 'intcol': np.random.randint(1000000000, size=10), + 'floatcol': np.random.randn(10), + 'stringcol': list(tm.rands(10))}) df.loc[np.random.rand(len(df)) > 0.5, 'dates2'] = pd.NaT ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'} @@ -322,11 +322,13 @@ def test_df_add_flex_filled_mixed_dtypes(self): 'B': ser * 2}) tm.assert_frame_equal(result, expected) - def test_arith_flex_frame(self, all_arithmetic_operators, float_frame, - mixed_float_frame): + def test_arith_flex_frame(self, all_arithmetic_operators): # one instance of parametrized fixture op = all_arithmetic_operators + mixed_float_frame = tm.get_mixed_float_frame() + float_frame = pd.DataFrame(tm.getSeriesData()) + def f(x, y): # r-versions not in operator-stdlib; get op without "r" and invert if op.startswith('__r'): @@ -344,8 +346,12 @@ def f(x, y): _check_mixed_float(result, dtype=dict(C=None)) @pytest.mark.parametrize('op', ['__add__', '__sub__', '__mul__']) - def test_arith_flex_frame_mixed(self, op, int_frame, mixed_int_frame, - mixed_float_frame): + def test_arith_flex_frame_mixed(self, op): + + int_frame = tm.get_int_frame() + mixed_int_frame = tm.get_mixed_int_frame() + mixed_float_frame = tm.get_mixed_float_frame() + f = getattr(operator, op) # vs mix int @@ -372,11 +378,12 @@ def test_arith_flex_frame_mixed(self, op, int_frame, mixed_int_frame, expected = f(int_frame, 2 * int_frame) tm.assert_frame_equal(result, expected) - def test_arith_flex_frame_raise(self, all_arithmetic_operators, - float_frame): + def test_arith_flex_frame_raise(self, all_arithmetic_operators): # one instance of parametrized fixture op = all_arithmetic_operators + float_frame = pd.DataFrame(tm.getSeriesData()) + # Check that arrays with dim >= 3 raise for dim in range(3, 6): arr = np.ones((1,) * dim) @@ -384,7 +391,8 @@ def test_arith_flex_frame_raise(self, all_arithmetic_operators, with pytest.raises(ValueError, match=msg): getattr(float_frame, op)(arr) - def test_arith_flex_frame_corner(self, float_frame): + def test_arith_flex_frame_corner(self): + float_frame = pd.DataFrame(tm.getSeriesData()) const_add = float_frame.add(1) tm.assert_frame_equal(const_add, float_frame + 1) @@ -402,8 +410,8 @@ def test_arith_flex_frame_corner(self, float_frame): with pytest.raises(NotImplementedError, match='fill_value'): float_frame.add(float_frame.iloc[0], axis='index', fill_value=3) - def test_arith_flex_series(self, simple_frame): - df = simple_frame + def test_arith_flex_series(self): + df = tm.get_simple_frame() row = df.xs('a') col = df['two'] diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 67f27948343f7..1b35ab5301094 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -43,7 +43,8 @@ def test_setitem_invalidates_datetime_index_freq(self): assert dti.freq == 'D' assert dti[1] == ts - def test_cast_internals(self, float_frame): + def test_cast_internals(self): + float_frame = DataFrame(tm.getSeriesData()) casted = DataFrame(float_frame._data, dtype=int) expected = DataFrame(float_frame._series, dtype=int) assert_frame_equal(casted, expected) @@ -52,7 +53,8 @@ def test_cast_internals(self, float_frame): expected = DataFrame(float_frame._series, dtype=np.int32) assert_frame_equal(casted, expected) - def test_consolidate(self, float_frame): + def test_consolidate(self): + float_frame = DataFrame(tm.getSeriesData()) float_frame['E'] = 7. consolidated = float_frame._consolidate() assert len(consolidated._data.blocks) == 1 @@ -68,20 +70,23 @@ def test_consolidate(self, float_frame): float_frame._consolidate(inplace=True) assert len(float_frame._data.blocks) == 1 - def test_consolidate_inplace(self, float_frame): + def test_consolidate_inplace(self): + float_frame = DataFrame(tm.getSeriesData()) frame = float_frame.copy() # noqa # triggers in-place consolidation for letter in range(ord('A'), ord('Z')): float_frame[chr(letter)] = chr(letter) - def test_values_consolidate(self, float_frame): + def test_values_consolidate(self): + float_frame = DataFrame(tm.getSeriesData()) float_frame['E'] = 7. assert not float_frame._data.is_consolidated() _ = float_frame.values # noqa assert float_frame._data.is_consolidated() - def test_modify_values(self, float_frame): + def test_modify_values(self): + float_frame = DataFrame(tm.getSeriesData()) float_frame.values[5] = 5 assert (float_frame.values[5] == 5).all() @@ -90,7 +95,8 @@ def test_modify_values(self, float_frame): float_frame.values[6] = 6 assert (float_frame.values[6] == 6).all() - def test_boolean_set_uncons(self, float_frame): + def test_boolean_set_uncons(self): + float_frame = DataFrame(tm.getSeriesData()) float_frame['E'] = 7. expected = float_frame.values.copy() @@ -99,13 +105,15 @@ def test_boolean_set_uncons(self, float_frame): float_frame[float_frame > 1] = 2 assert_almost_equal(expected, float_frame.values) - def test_values_numeric_cols(self, float_frame): + def test_values_numeric_cols(self): + float_frame = DataFrame(tm.getSeriesData()) float_frame['foo'] = 'bar' values = float_frame[['A', 'B', 'C', 'D']].values assert values.dtype == np.float64 - def test_values_lcd(self, mixed_float_frame, mixed_int_frame): + def test_values_lcd(self): + mixed_float_frame = tm.get_mixed_float_frame() # mixed lcd values = mixed_float_frame[['A', 'B', 'C', 'D']].values @@ -119,6 +127,8 @@ def test_values_lcd(self, mixed_float_frame, mixed_int_frame): # GH 10364 # B uint64 forces float because there are other signed int types + mixed_int_frame = tm.get_mixed_int_frame() + values = mixed_int_frame[['A', 'B', 'C', 'D']].values assert values.dtype == np.float64 @@ -212,9 +222,11 @@ def test_constructor_with_convert(self): None], np.object_), name='A') assert_series_equal(result, expected) - def test_construction_with_mixed(self, float_string_frame): + def test_construction_with_mixed(self): # test construction edge cases with mixed types + float_string_frame = tm.get_float_string_frame() + # f7u12, this does not work without extensive workaround data = [[datetime(2001, 1, 5), nan, datetime(2001, 1, 2)], [datetime(2000, 1, 2), datetime(2000, 1, 3), @@ -303,8 +315,9 @@ def test_equals_different_blocks(self): assert df0.equals(df1) assert df1.equals(df0) - def test_copy_blocks(self, float_frame): + def test_copy_blocks(self): # API/ENH 9607 + float_frame = DataFrame(tm.getSeriesData()) df = DataFrame(float_frame, copy=True) column = df.columns[0] @@ -321,8 +334,9 @@ def test_copy_blocks(self, float_frame): # make sure we did not change the original DataFrame assert not _df[column].equals(df[column]) - def test_no_copy_blocks(self, float_frame): + def test_no_copy_blocks(self): # API/ENH 9607 + float_frame = DataFrame(tm.getSeriesData()) df = DataFrame(float_frame, copy=True) column = df.columns[0] @@ -339,7 +353,10 @@ def test_no_copy_blocks(self, float_frame): # make sure we did change the original DataFrame assert _df[column].equals(df[column]) - def test_copy(self, float_frame, float_string_frame): + def test_copy(self): + float_frame = DataFrame(tm.getSeriesData()) + float_string_frame = tm.get_float_string_frame() + cop = float_frame.copy() cop['E'] = cop['A'] assert 'E' not in float_frame @@ -348,7 +365,10 @@ def test_copy(self, float_frame, float_string_frame): copy = float_string_frame.copy() assert copy._data is not float_string_frame._data - def test_pickle(self, float_string_frame, empty_frame, timezone_frame): + def test_pickle(self): + empty_frame = DataFrame({}) + float_string_frame = tm.get_float_string_frame() + unpickled = tm.round_trip_pickle(float_string_frame) assert_frame_equal(float_string_frame, unpickled) @@ -360,6 +380,7 @@ def test_pickle(self, float_string_frame, empty_frame, timezone_frame): repr(unpickled) # tz frame + timezone_frame = tm.get_timezone_frame() unpickled = tm.round_trip_pickle(timezone_frame) assert_frame_equal(timezone_frame, unpickled) @@ -395,7 +416,10 @@ def test_consolidate_datetime64(self): df.starting), ser_starting.index) tm.assert_index_equal(pd.DatetimeIndex(df.ending), ser_ending.index) - def test_is_mixed_type(self, float_frame, float_string_frame): + def test_is_mixed_type(self): + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + assert not float_frame._is_mixed_type assert float_string_frame._is_mixed_type @@ -455,7 +479,9 @@ def test_get_numeric_data_extension_dtype(self): expected = df.loc[:, ['A', 'C']] assert_frame_equal(result, expected) - def test_convert_objects(self, float_string_frame): + def test_convert_objects(self): + + float_string_frame = tm.get_float_string_frame() oops = float_string_frame.T.T converted = oops._convert(datetime=True) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 2df43cd678764..08317b5b15bde 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -33,7 +33,8 @@ import pandas as pd from pandas import ( Categorical, CategoricalIndex, DataFrame, DatetimeIndex, Index, - IntervalIndex, MultiIndex, Panel, RangeIndex, Series, bdate_range) + IntervalIndex, MultiIndex, NaT, Panel, RangeIndex, Series, bdate_range, + date_range) from pandas.core.algorithms import take_1d from pandas.core.arrays import ( DatetimeArray, ExtensionArray, IntervalArray, PeriodArray, TimedeltaArray, @@ -3060,3 +3061,117 @@ def convert_rows_list_to_csv_str(rows_list): sep = os.linesep expected = sep.join(rows_list) + sep return expected + + +# ----------------------------------------------------------------------------- +# Fixture-Like Singletons + +def get_simple_frame(): + """ + Fixture for simple 3x3 DataFrame + + Columns are ['one', 'two', 'three'], index is ['a', 'b', 'c']. + """ + arr = np.array([[1., 2., 3.], + [4., 5., 6.], + [7., 8., 9.]]) + + return DataFrame(arr, columns=['one', 'two', 'three'], + index=['a', 'b', 'c']) + + +def get_int_frame(): + """ + Fixture for DataFrame of ints with index of unique strings + + Columns are ['A', 'B', 'C', 'D'] + """ + df = DataFrame({k: v.astype(int) + for k, v in compat.iteritems(getSeriesData())}) + # force these all to int64 to avoid platform testing issues + return DataFrame({c: s for c, s in compat.iteritems(df)}, dtype=np.int64) + + +def get_mixed_int_frame(): + """ + Fixture for DataFrame of different int types with index of unique strings + + Columns are ['A', 'B', 'C', 'D']. + """ + df = DataFrame({k: v.astype(int) + for k, v in compat.iteritems(getSeriesData())}) + df.A = df.A.astype('int32') + df.B = np.ones(len(df.B), dtype='uint64') + df.C = df.C.astype('uint8') + df.D = df.C.astype('int64') + return df + + +def get_float_frame_with_na(): + """ + Fixture for DataFrame of floats with index of unique strings + + Columns are ['A', 'B', 'C', 'D']; some entries are missing + """ + df = DataFrame(getSeriesData()) + # set some NAs + df.loc[5:10] = np.nan + df.loc[15:20, -2:] = np.nan + return df + + +def get_float_string_frame(): + """ + Fixture for DataFrame of floats and strings with index of unique strings + + Columns are ['A', 'B', 'C', 'D', 'foo']. + """ + df = DataFrame(getSeriesData()) + df['foo'] = 'bar' + return df + + +def get_mixed_float_frame(): + """ + Fixture for DataFrame of different float types with index of unique strings + + Columns are ['A', 'B', 'C', 'D']. + """ + df = DataFrame(getSeriesData()) + df.A = df.A.astype('float32') + df.B = df.B.astype('float32') + df.C = df.C.astype('float16') + df.D = df.D.astype('float64') + return df + + +def get_timezone_frame(): + """ + Fixture for DataFrame of date_range Series with different time zones + + Columns are ['A', 'B', 'C']; some entries are missing + """ + df = DataFrame({'A': date_range('20130101', periods=3), + 'B': date_range('20130101', periods=3, + tz='US/Eastern'), + 'C': date_range('20130101', periods=3, + tz='CET')}) + df.iloc[1, 1] = NaT + df.iloc[1, 2] = NaT + return df + + +def get_frame_of_index_cols(): + """ + Fixture for DataFrame of columns that can be used for indexing + + Columns are ['A', 'B', 'C', 'D', 'E', ('tuple', 'as', 'label')]; + 'A' & 'B' contain duplicates (but are jointly unique), the rest are unique. + """ + df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'], + 'B': ['one', 'two', 'three', 'one', 'two'], + 'C': ['a', 'b', 'c', 'd', 'e'], + 'D': np.random.randn(5), + 'E': np.random.randn(5), + ('tuple', 'as', 'label'): np.random.randn(5)}) + return df From 032a74eab1d7894d9c7306859cd5046d6b8726aa Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 14 Jan 2019 14:49:54 -0800 Subject: [PATCH 2/8] troubleshoot --- pandas/tests/frame/test_alter_axes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index ab633e022540e..296c0fb8b37f5 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -684,8 +684,10 @@ def test_rename_inplace(self): assert 'C' in float_frame assert 'foo' not in float_frame + fid = id(float_frame) c_id = id(float_frame['C']) float_frame = float_frame.copy() + assert fid != id(float_frame) float_frame.rename(columns={'C': 'foo'}, inplace=True) assert 'C' not in float_frame From 37a03ad6833326743cee3a39fde17e7a4aac26f3 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 14 Jan 2019 18:39:56 -0800 Subject: [PATCH 3/8] troubleshoot --- pandas/tests/frame/test_alter_axes.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 296c0fb8b37f5..c6faefe5b0470 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -19,6 +19,9 @@ import pandas.util.testing as tm +ff = tm.get_float_frame() + + class TestDataFrameAlterAxes(): def test_set_index_directly(self): @@ -678,16 +681,14 @@ def test_rename_nocopy(self): assert (float_frame['C'] == 1.).all() def test_rename_inplace(self): - float_frame = DataFrame(tm.getSeriesData()) + float_frame = ff # get from outside local scope float_frame.rename(columns={'C': 'foo'}) assert 'C' in float_frame assert 'foo' not in float_frame - fid = id(float_frame) c_id = id(float_frame['C']) float_frame = float_frame.copy() - assert fid != id(float_frame) float_frame.rename(columns={'C': 'foo'}, inplace=True) assert 'C' not in float_frame From f17435b688cec0218bee9e61f532376186f8289b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 16 Jan 2019 18:49:35 -0800 Subject: [PATCH 4/8] fixture for broken case --- pandas/tests/frame/test_alter_axes.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index c6faefe5b0470..2eb1dc750e20d 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -19,7 +19,12 @@ import pandas.util.testing as tm -ff = tm.get_float_frame() +@pytest.fixture +def float_frame(): + # GH#24769 + # Because for some reason a test behaves differently depending on + # whether it uses a fixture or not + return tm.get_float_frame() class TestDataFrameAlterAxes(): @@ -680,8 +685,7 @@ def test_rename_nocopy(self): renamed['foo'] = 1. assert (float_frame['C'] == 1.).all() - def test_rename_inplace(self): - float_frame = ff # get from outside local scope + def test_rename_inplace(self, float_frame): float_frame.rename(columns={'C': 'foo'}) assert 'C' in float_frame From 689c203a22848db584461c3c5556407dbc6400ae Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 17 Jan 2019 14:20:46 -0800 Subject: [PATCH 5/8] update kludge --- pandas/tests/frame/test_alter_axes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 2eb1dc750e20d..2a9ac12b3761a 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -24,7 +24,7 @@ def float_frame(): # GH#24769 # Because for some reason a test behaves differently depending on # whether it uses a fixture or not - return tm.get_float_frame() + return DataFrame(tm.getSeriesData()) class TestDataFrameAlterAxes(): From fde7cd88b23a4711398e4708fac29a95984fe16f Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 20 Jan 2019 10:28:14 -0800 Subject: [PATCH 6/8] See if removing duplicate fixture is OK now... --- pandas/tests/frame/test_alter_axes.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index cf37546e7c4f8..bf3dd0c466fd4 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -19,14 +19,6 @@ import pandas.util.testing as tm -@pytest.fixture -def float_frame(): - # GH#24769 - # Because for some reason a test behaves differently depending on - # whether it uses a fixture or not - return DataFrame(tm.getSeriesData()) - - class TestDataFrameAlterAxes(): def test_set_index_directly(self): @@ -696,7 +688,8 @@ def test_rename_nocopy(self): renamed['foo'] = 1. assert (float_frame['C'] == 1.).all() - def test_rename_inplace(self, float_frame): + def test_rename_inplace(self): + float_frame = DataFrame(tm.getSeriesData()) float_frame.rename(columns={'C': 'foo'}) assert 'C' in float_frame From fab45b00a361b69a8ef1a193262f04ced4524e60 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 23 Jan 2019 17:39:32 -0800 Subject: [PATCH 7/8] rebase fixup --- pandas/tests/frame/test_alter_axes.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index bf3dd0c466fd4..d0082b59a2545 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -256,9 +256,8 @@ def test_set_index_raise(self, drop, append): @pytest.mark.parametrize('append', [True, False]) @pytest.mark.parametrize('drop', [True, False]) @pytest.mark.parametrize('box', [set, iter]) - def test_set_index_raise_on_type(self, frame_of_index_cols, box, - drop, append): - df = frame_of_index_cols + def test_set_index_raise_on_type(self, box, drop, append): + df = tm.get_frame_of_index_cols() msg = 'The parameter "keys" may be a column key, .*' # forbidden type, e.g. set/tuple/iter From 3b87f34da45b07271b4f0b00388b10003616f7d3 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 24 Jan 2019 10:47:27 -0800 Subject: [PATCH 8/8] retain reference --- pandas/tests/frame/test_alter_axes.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index d0082b59a2545..99c4d7b982ebc 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -688,7 +688,9 @@ def test_rename_nocopy(self): assert (float_frame['C'] == 1.).all() def test_rename_inplace(self): + # See GH#24769 re dereferencing semantics float_frame = DataFrame(tm.getSeriesData()) + float_frame_orig = float_frame float_frame.rename(columns={'C': 'foo'}) assert 'C' in float_frame @@ -701,6 +703,7 @@ def test_rename_inplace(self): assert 'C' not in float_frame assert 'foo' in float_frame assert id(float_frame['foo']) != c_id + assert float_frame is not float_frame_orig def test_rename_bug(self): # GH 5344