diff --git a/pandas/conftest.py b/pandas/conftest.py index c325a268fe418..4a3fb5c2916c6 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1838,16 +1838,6 @@ def ip(): return InteractiveShell(config=c) -@pytest.fixture(params=["bsr", "coo", "csc", "csr", "dia", "dok", "lil"]) -def spmatrix(request): - """ - Yields scipy sparse matrix classes. - """ - sparse = pytest.importorskip("scipy.sparse") - - return getattr(sparse, request.param + "_matrix") - - @pytest.fixture( params=[ getattr(pd.offsets, o) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index ebcd7cbd963d7..121bfb78fe5c8 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -37,14 +37,6 @@ def switch_numexpr_min_elements(request, monkeypatch): yield request.param -@pytest.fixture(params=[Index, Series, tm.to_array]) -def box_pandas_1d_array(request): - """ - Fixture to test behavior for Index, Series and tm.to_array classes - """ - return request.param - - @pytest.fixture( params=[ # TODO: add more dtypes here @@ -62,17 +54,6 @@ def numeric_idx(request): return request.param -@pytest.fixture( - params=[Index, Series, tm.to_array, np.array, list], ids=lambda x: x.__name__ -) -def box_1d_array(request): - """ - Fixture to test behavior for Index, Series, tm.to_array, numpy Array and list - classes - """ - return request.param - - def adjust_negative_zero(zero, expected): """ Helper to adjust the expected result if we are dividing by -0.0 @@ -1499,6 +1480,8 @@ def test_dataframe_div_silenced(): "data, expected_data", [([0, 1, 2], [0, 2, 4])], ) +@pytest.mark.parametrize("box_pandas_1d_array", [Index, Series, tm.to_array]) +@pytest.mark.parametrize("box_1d_array", [Index, Series, tm.to_array, np.array, list]) def test_integer_array_add_list_like( box_pandas_1d_array, box_1d_array, data, expected_data ): diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 3ec6d70494902..e2ef83c243957 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1977,9 +1977,12 @@ def test_nan_to_nat_conversions(): @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning") +@pytest.mark.parametrize("spmatrix", ["bsr", "coo", "csc", "csr", "dia", "dok", "lil"]) def test_is_scipy_sparse(spmatrix): - pytest.importorskip("scipy") - assert is_scipy_sparse(spmatrix([[0, 1]])) + sparse = pytest.importorskip("scipy.sparse") + + klass = getattr(sparse, spmatrix + "_matrix") + assert is_scipy_sparse(klass([[0, 1]])) assert not is_scipy_sparse(np.array([1])) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index e3a467e8bf65b..1b83c048411a8 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1662,25 +1662,6 @@ def orig(self): orig = DataFrame({"cats": cats, "values": values}, index=idx) return orig - @pytest.fixture - def exp_single_row(self): - # The expected values if we change a single row - cats1 = Categorical(["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"]) - idx1 = Index(["h", "i", "j", "k", "l", "m", "n"]) - values1 = [1, 1, 2, 1, 1, 1, 1] - exp_single_row = DataFrame({"cats": cats1, "values": values1}, index=idx1) - return exp_single_row - - @pytest.fixture - def exp_multi_row(self): - # assign multiple rows (mixed values) (-> array) -> exp_multi_row - # changed multiple rows - cats2 = Categorical(["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"]) - idx2 = Index(["h", "i", "j", "k", "l", "m", "n"]) - values2 = [1, 1, 2, 2, 1, 1, 1] - exp_multi_row = DataFrame({"cats": cats2, "values": values2}, index=idx2) - return exp_multi_row - @pytest.fixture def exp_parts_cats_col(self): # changed part of the cats column @@ -1702,7 +1683,7 @@ def exp_single_cats_value(self): return exp_single_cats_value @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) - def test_loc_iloc_setitem_list_of_lists(self, orig, exp_multi_row, indexer): + def test_loc_iloc_setitem_list_of_lists(self, orig, indexer): # - assign multiple rows (mixed values) -> exp_multi_row df = orig.copy() @@ -1711,6 +1692,11 @@ def test_loc_iloc_setitem_list_of_lists(self, orig, exp_multi_row, indexer): key = slice("j", "k") indexer(df)[key, :] = [["b", 2], ["b", 2]] + + cats2 = Categorical(["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"]) + idx2 = Index(["h", "i", "j", "k", "l", "m", "n"]) + values2 = [1, 1, 2, 2, 1, 1, 1] + exp_multi_row = DataFrame({"cats": cats2, "values": values2}, index=idx2) tm.assert_frame_equal(df, exp_multi_row) df = orig.copy() @@ -1752,9 +1738,7 @@ def test_loc_iloc_setitem_mask_single_value_in_categories( tm.assert_frame_equal(df, exp_single_cats_value) @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) - def test_loc_iloc_setitem_full_row_non_categorical_rhs( - self, orig, exp_single_row, indexer - ): + def test_loc_iloc_setitem_full_row_non_categorical_rhs(self, orig, indexer): # - assign a complete row (mixed values) -> exp_single_row df = orig.copy() @@ -1764,6 +1748,10 @@ def test_loc_iloc_setitem_full_row_non_categorical_rhs( # not categorical dtype, but "b" _is_ among the categories for df["cat"] indexer(df)[key, :] = ["b", 2] + cats1 = Categorical(["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"]) + idx1 = Index(["h", "i", "j", "k", "l", "m", "n"]) + values1 = [1, 1, 2, 1, 1, 1, 1] + exp_single_row = DataFrame({"cats": cats1, "values": values1}, index=idx1) tm.assert_frame_equal(df, exp_single_row) # "c" is not among the categories for df["cat"] diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py index 02f0b9e20871d..82802dd6e99eb 100644 --- a/pandas/tests/frame/methods/test_join.py +++ b/pandas/tests/frame/methods/test_join.py @@ -17,25 +17,6 @@ from pandas.core.reshape.concat import concat -@pytest.fixture -def frame_with_period_index(): - return DataFrame( - data=np.arange(20).reshape(4, 5), - columns=list("abcde"), - index=period_range(start="2000", freq="Y", periods=4), - ) - - -@pytest.fixture -def left(): - return DataFrame({"a": [20, 10, 0]}, index=[2, 1, 0]) - - -@pytest.fixture -def right(): - return DataFrame({"b": [300, 100, 200]}, index=[3, 1, 2]) - - @pytest.fixture def left_no_dup(): return DataFrame( @@ -112,7 +93,9 @@ def right_w_dups(right_no_dup): ), ], ) -def test_join(left, right, how, sort, expected): +def test_join(how, sort, expected): + left = DataFrame({"a": [20, 10, 0]}, index=[2, 1, 0]) + right = DataFrame({"b": [300, 100, 200]}, index=[3, 1, 2]) result = left.join(right, how=how, sort=sort, validate="1:1") tm.assert_frame_equal(result, expected) @@ -347,7 +330,12 @@ def test_join_overlap(float_frame): tm.assert_frame_equal(joined, expected.loc[:, joined.columns]) -def test_join_period_index(frame_with_period_index): +def test_join_period_index(): + frame_with_period_index = DataFrame( + data=np.arange(20).reshape(4, 5), + columns=list("abcde"), + index=period_range(start="2000", freq="Y", periods=4), + ) other = frame_with_period_index.rename(columns=lambda key: f"{key}{key}") joined_values = np.concatenate([frame_with_period_index.values] * 2, axis=1) diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py index 3ba893501914a..8a7b985c98069 100644 --- a/pandas/tests/frame/methods/test_nlargest.py +++ b/pandas/tests/frame/methods/test_nlargest.py @@ -12,25 +12,6 @@ from pandas.util.version import Version -@pytest.fixture -def df_duplicates(): - return pd.DataFrame( - {"a": [1, 2, 3, 4, 4], "b": [1, 1, 1, 1, 1], "c": [0, 1, 2, 5, 4]}, - index=[0, 0, 1, 1, 1], - ) - - -@pytest.fixture -def df_strings(): - return pd.DataFrame( - { - "a": np.random.default_rng(2).permutation(10), - "b": list(ascii_lowercase[:10]), - "c": np.random.default_rng(2).permutation(10).astype("float64"), - } - ) - - @pytest.fixture def df_main_dtypes(): return pd.DataFrame( @@ -81,9 +62,15 @@ class TestNLargestNSmallest: ], ) @pytest.mark.parametrize("n", range(1, 11)) - def test_nlargest_n(self, df_strings, nselect_method, n, order): + def test_nlargest_n(self, nselect_method, n, order): # GH#10393 - df = df_strings + df = pd.DataFrame( + { + "a": np.random.default_rng(2).permutation(10), + "b": list(ascii_lowercase[:10]), + "c": np.random.default_rng(2).permutation(10).astype("float64"), + } + ) if "b" in order: error_msg = ( f"Column 'b' has dtype (object|string), " @@ -156,10 +143,13 @@ def test_nlargest_n_identical_values(self): [["a", "b", "c"], ["c", "b", "a"], ["a"], ["b"], ["a", "b"], ["c", "b"]], ) @pytest.mark.parametrize("n", range(1, 6)) - def test_nlargest_n_duplicate_index(self, df_duplicates, n, order, request): + def test_nlargest_n_duplicate_index(self, n, order, request): # GH#13412 - df = df_duplicates + df = pd.DataFrame( + {"a": [1, 2, 3, 4, 4], "b": [1, 1, 1, 1, 1], "c": [0, 1, 2, 5, 4]}, + index=[0, 0, 1, 1, 1], + ) result = df.nsmallest(n, order) expected = df.sort_values(order).head(n) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index ef78ae62cb4d6..91f5de8e7d7f3 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -15,16 +15,6 @@ ) -@pytest.fixture() -def gpd_style_subclass_df(): - class SubclassedDataFrame(DataFrame): - @property - def _constructor(self): - return SubclassedDataFrame - - return SubclassedDataFrame({"a": [1, 2, 3]}) - - class TestDataFrameSubclassing: def test_frame_subclassing_and_slicing(self): # Subclass frame and ensure it returns the right class on slicing it @@ -710,14 +700,21 @@ def test_idxmax_preserves_subclass(self): result = df.idxmax() assert isinstance(result, tm.SubclassedSeries) - def test_convert_dtypes_preserves_subclass(self, gpd_style_subclass_df): + def test_convert_dtypes_preserves_subclass(self): # GH 43668 df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) result = df.convert_dtypes() assert isinstance(result, tm.SubclassedDataFrame) - result = gpd_style_subclass_df.convert_dtypes() - assert isinstance(result, type(gpd_style_subclass_df)) + def test_convert_dtypes_preserves_subclass_with_constructor(self): + class SubclassedDataFrame(DataFrame): + @property + def _constructor(self): + return SubclassedDataFrame + + df = SubclassedDataFrame({"a": [1, 2, 3]}) + result = df.convert_dtypes() + assert isinstance(result, SubclassedDataFrame) def test_astype_preserves_subclass(self): # GH#40810 diff --git a/pandas/tests/frame/test_validate.py b/pandas/tests/frame/test_validate.py index e99e0a6863848..fdeecba29a617 100644 --- a/pandas/tests/frame/test_validate.py +++ b/pandas/tests/frame/test_validate.py @@ -3,11 +3,6 @@ from pandas.core.frame import DataFrame -@pytest.fixture -def dataframe(): - return DataFrame({"a": [1, 2], "b": [3, 4]}) - - class TestDataFrameValidate: """Tests for error handling related to data types of method arguments.""" @@ -24,7 +19,8 @@ class TestDataFrameValidate: ], ) @pytest.mark.parametrize("inplace", [1, "True", [1, 2, 3], 5.0]) - def test_validate_bool_args(self, dataframe, func, inplace): + def test_validate_bool_args(self, func, inplace): + dataframe = DataFrame({"a": [1, 2], "b": [3, 4]}) msg = 'For argument "inplace" expected type bool' kwargs = {"inplace": inplace} diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 866e9e203ffe3..f25e7d4ab8c79 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -386,18 +386,11 @@ def idfn(x): return str(x) -@pytest.fixture(params=_all_methods, ids=lambda x: idfn(x[-1])) -def ndframe_method(request): - """ - An NDFrame method returning an NDFrame. - """ - return request.param - - @pytest.mark.filterwarnings( "ignore:DataFrame.fillna with 'method' is deprecated:FutureWarning", "ignore:last is deprecated:FutureWarning", ) +@pytest.mark.parametrize("ndframe_method", _all_methods, ids=lambda x: idfn(x[-1])) def test_finalize_called(ndframe_method): cls, init_args, method = ndframe_method ndframe = cls(*init_args) diff --git a/pandas/tests/generic/test_label_or_level_utils.py b/pandas/tests/generic/test_label_or_level_utils.py index 97be46f716d7d..80c24c647bc11 100644 --- a/pandas/tests/generic/test_label_or_level_utils.py +++ b/pandas/tests/generic/test_label_or_level_utils.py @@ -34,15 +34,6 @@ def df_ambig(df): return df -@pytest.fixture -def df_duplabels(df): - """DataFrame with level 'L1' and labels 'L2', 'L3', and 'L2'""" - df = df.set_index(["L1"]) - df = pd.concat([df, df["L2"]], axis=1) - - return df - - # Test is label/level reference # ============================= def get_labels_levels(df_levels): @@ -229,7 +220,9 @@ def test_get_label_or_level_values_df_ambig(df_ambig, axis): assert_label_values(df_ambig, ["L3"], axis=axis) -def test_get_label_or_level_values_df_duplabels(df_duplabels, axis): +def test_get_label_or_level_values_df_duplabels(df, axis): + df = df.set_index(["L1"]) + df_duplabels = pd.concat([df, df["L2"]], axis=1) axis = df_duplabels._get_axis_number(axis) # Transpose frame if axis == 1 if axis == 1: diff --git a/pandas/tests/groupby/methods/test_describe.py b/pandas/tests/groupby/methods/test_describe.py index f27e99809176c..e73fb15a54181 100644 --- a/pandas/tests/groupby/methods/test_describe.py +++ b/pandas/tests/groupby/methods/test_describe.py @@ -226,49 +226,34 @@ def test_describe_duplicate_columns(): tm.assert_frame_equal(result, expected) -class TestGroupByNonCythonPaths: +def test_describe_non_cython_paths(): # GH#5610 non-cython calls should not include the grouper # Tests for code not expected to go through cython paths. + df = DataFrame( + [[1, 2, "foo"], [1, np.nan, "bar"], [3, np.nan, "baz"]], + columns=["A", "B", "C"], + ) + gb = df.groupby("A") + expected_index = Index([1, 3], name="A") + expected_col = MultiIndex( + levels=[["B"], ["count", "mean", "std", "min", "25%", "50%", "75%", "max"]], + codes=[[0] * 8, list(range(8))], + ) + expected = DataFrame( + [ + [1.0, 2.0, np.nan, 2.0, 2.0, 2.0, 2.0, 2.0], + [0.0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + ], + index=expected_index, + columns=expected_col, + ) + result = gb.describe() + tm.assert_frame_equal(result, expected) - @pytest.fixture - def df(self): - df = DataFrame( - [[1, 2, "foo"], [1, np.nan, "bar"], [3, np.nan, "baz"]], - columns=["A", "B", "C"], - ) - return df - - @pytest.fixture - def gb(self, df): - gb = df.groupby("A") - return gb - - @pytest.fixture - def gni(self, df): - gni = df.groupby("A", as_index=False) - return gni - - def test_describe(self, df, gb, gni): - # describe - expected_index = Index([1, 3], name="A") - expected_col = MultiIndex( - levels=[["B"], ["count", "mean", "std", "min", "25%", "50%", "75%", "max"]], - codes=[[0] * 8, list(range(8))], - ) - expected = DataFrame( - [ - [1.0, 2.0, np.nan, 2.0, 2.0, 2.0, 2.0, 2.0], - [0.0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], - ], - index=expected_index, - columns=expected_col, - ) - result = gb.describe() - tm.assert_frame_equal(result, expected) - - expected = expected.reset_index() - result = gni.describe() - tm.assert_frame_equal(result, expected) + gni = df.groupby("A", as_index=False) + expected = expected.reset_index() + result = gni.describe() + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("dtype", [int, float, object]) diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py index 5c9f7febe32b3..35f2f47f5ff97 100644 --- a/pandas/tests/groupby/methods/test_value_counts.py +++ b/pandas/tests/groupby/methods/test_value_counts.py @@ -419,14 +419,6 @@ def test_compound( tm.assert_frame_equal(result, expected) -@pytest.fixture -def animals_df(): - return DataFrame( - {"key": [1, 1, 1, 1], "num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]}, - index=["falcon", "dog", "cat", "ant"], - ) - - @pytest.mark.parametrize( "sort, ascending, normalize, name, expected_data, expected_index", [ @@ -444,10 +436,14 @@ def animals_df(): ], ) def test_data_frame_value_counts( - animals_df, sort, ascending, normalize, name, expected_data, expected_index + sort, ascending, normalize, name, expected_data, expected_index ): # 3-way compare with :meth:`~DataFrame.value_counts` # Tests from frame/methods/test_value_counts.py + animals_df = DataFrame( + {"key": [1, 1, 1, 1], "num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]}, + index=["falcon", "dog", "cat", "ant"], + ) result_frame = animals_df.value_counts( sort=sort, ascending=ascending, normalize=normalize ) @@ -467,19 +463,6 @@ def test_data_frame_value_counts( tm.assert_series_equal(result_frame_groupby, expected) -@pytest.fixture -def nulls_df(): - n = np.nan - return DataFrame( - { - "A": [1, 1, n, 4, n, 6, 6, 6, 6], - "B": [1, 1, 3, n, n, 6, 6, 6, 6], - "C": [1, 2, 3, 4, 5, 6, n, 8, n], - "D": [1, 2, 3, 4, 5, 6, 7, n, n], - } - ) - - @pytest.mark.parametrize( "group_dropna, count_dropna, expected_rows, expected_values", [ @@ -495,7 +478,7 @@ def nulls_df(): ], ) def test_dropna_combinations( - nulls_df, group_dropna, count_dropna, expected_rows, expected_values, request + group_dropna, count_dropna, expected_rows, expected_values, request ): if Version(np.__version__) >= Version("1.25") and not group_dropna: request.applymarker( @@ -507,6 +490,14 @@ def test_dropna_combinations( strict=False, ) ) + nulls_df = DataFrame( + { + "A": [1, 1, np.nan, 4, np.nan, 6, 6, 6, 6], + "B": [1, 1, 3, np.nan, np.nan, 6, 6, 6, 6], + "C": [1, 2, 3, 4, 5, 6, np.nan, 8, np.nan], + "D": [1, 2, 3, 4, 5, 6, 7, np.nan, np.nan], + } + ) gp = nulls_df.groupby(["A", "B"], dropna=group_dropna) result = gp.value_counts(normalize=True, sort=True, dropna=count_dropna) columns = DataFrame() @@ -517,17 +508,6 @@ def test_dropna_combinations( tm.assert_series_equal(result, expected) -@pytest.fixture -def names_with_nulls_df(nulls_fixture): - return DataFrame( - { - "key": [1, 1, 1, 1], - "first_name": ["John", "Anne", "John", "Beth"], - "middle_name": ["Smith", nulls_fixture, nulls_fixture, "Louise"], - }, - ) - - @pytest.mark.parametrize( "dropna, expected_data, expected_index", [ @@ -556,11 +536,18 @@ def names_with_nulls_df(nulls_fixture): ) @pytest.mark.parametrize("normalize, name", [(False, "count"), (True, "proportion")]) def test_data_frame_value_counts_dropna( - names_with_nulls_df, dropna, normalize, name, expected_data, expected_index + nulls_fixture, dropna, normalize, name, expected_data, expected_index ): # GH 41334 # 3-way compare with :meth:`~DataFrame.value_counts` # Tests with nulls from frame/methods/test_value_counts.py + names_with_nulls_df = DataFrame( + { + "key": [1, 1, 1, 1], + "first_name": ["John", "Anne", "John", "Beth"], + "middle_name": ["Smith", nulls_fixture, nulls_fixture, "Louise"], + }, + ) result_frame = names_with_nulls_df.value_counts(dropna=dropna, normalize=normalize) expected = Series( data=expected_data, diff --git a/pandas/tests/groupby/test_index_as_string.py b/pandas/tests/groupby/test_index_as_string.py index 4aaf3de9a23b2..743db7e70b14b 100644 --- a/pandas/tests/groupby/test_index_as_string.py +++ b/pandas/tests/groupby/test_index_as_string.py @@ -5,38 +5,6 @@ import pandas._testing as tm -@pytest.fixture(params=[["inner"], ["inner", "outer"]]) -def frame(request): - levels = request.param - df = pd.DataFrame( - { - "outer": ["a", "a", "a", "b", "b", "b"], - "inner": [1, 2, 3, 1, 2, 3], - "A": np.arange(6), - "B": ["one", "one", "two", "two", "one", "one"], - } - ) - if levels: - df = df.set_index(levels) - - return df - - -@pytest.fixture() -def series(): - df = pd.DataFrame( - { - "outer": ["a", "a", "a", "b", "b", "b"], - "inner": [1, 2, 3, 1, 2, 3], - "A": np.arange(6), - "B": ["one", "one", "two", "two", "one", "one"], - } - ) - s = df.set_index(["outer", "inner", "B"])["A"] - - return s - - @pytest.mark.parametrize( "key_strs,groupers", [ @@ -46,7 +14,17 @@ def series(): (["inner", "B"], [pd.Grouper(level="inner"), "B"]), # Index and column ], ) -def test_grouper_index_level_as_string(frame, key_strs, groupers): +@pytest.mark.parametrize("levels", [["inner"], ["inner", "outer"]]) +def test_grouper_index_level_as_string(levels, key_strs, groupers): + frame = pd.DataFrame( + { + "outer": ["a", "a", "a", "b", "b", "b"], + "inner": [1, 2, 3, 1, 2, 3], + "A": np.arange(6), + "B": ["one", "one", "two", "two", "one", "one"], + } + ) + frame = frame.set_index(levels) if "B" not in key_strs or "outer" in frame.columns: result = frame.groupby(key_strs).mean(numeric_only=True) expected = frame.groupby(groupers).mean(numeric_only=True) @@ -71,8 +49,17 @@ def test_grouper_index_level_as_string(frame, key_strs, groupers): ["B", "outer", "inner"], ], ) -def test_grouper_index_level_as_string_series(series, levels): +def test_grouper_index_level_as_string_series(levels): # Compute expected result + df = pd.DataFrame( + { + "outer": ["a", "a", "a", "b", "b", "b"], + "inner": [1, 2, 3, 1, 2, 3], + "A": np.arange(6), + "B": ["one", "one", "two", "two", "one", "one"], + } + ) + series = df.set_index(["outer", "inner", "B"])["A"] if isinstance(levels, list): groupers = [pd.Grouper(level=lv) for lv in levels] else: diff --git a/pandas/tests/groupby/test_indexing.py b/pandas/tests/groupby/test_indexing.py index 664c52babac13..f839bf156ca00 100644 --- a/pandas/tests/groupby/test_indexing.py +++ b/pandas/tests/groupby/test_indexing.py @@ -118,15 +118,26 @@ def test_doc_examples(): tm.assert_frame_equal(result, expected) -@pytest.fixture() -def multiindex_data(): +def test_multiindex(): + # Test the multiindex mentioned as the use-case in the documentation + + def _make_df_from_data(data): + rows = {} + for date in data: + for level in data[date]: + rows[(date, level[0])] = {"A": level[1], "B": level[2]} + + df = pd.DataFrame.from_dict(rows, orient="index") + df.index.names = ("Date", "Item") + return df + rng = np.random.default_rng(2) ndates = 100 nitems = 20 dates = pd.date_range("20130101", periods=ndates, freq="D") items = [f"item {i}" for i in range(nitems)] - data = {} + multiindex_data = {} for date in dates: nitems_for_date = nitems - rng.integers(0, 12) levels = [ @@ -134,28 +145,12 @@ def multiindex_data(): for item in items[:nitems_for_date] ] levels.sort(key=lambda x: x[1]) - data[date] = levels - - return data - + multiindex_data[date] = levels -def _make_df_from_data(data): - rows = {} - for date in data: - for level in data[date]: - rows[(date, level[0])] = {"A": level[1], "B": level[2]} - - df = pd.DataFrame.from_dict(rows, orient="index") - df.index.names = ("Date", "Item") - return df - - -def test_multiindex(multiindex_data): - # Test the multiindex mentioned as the use-case in the documentation df = _make_df_from_data(multiindex_data) result = df.groupby("Date", as_index=False).nth(slice(3, -3)) - sliced = {date: multiindex_data[date][3:-3] for date in multiindex_data} + sliced = {date: values[3:-3] for date, values in multiindex_data.items()} expected = _make_df_from_data(sliced) tm.assert_frame_equal(result, expected) @@ -271,15 +266,11 @@ def test_step(step): tm.assert_frame_equal(result, expected) -@pytest.fixture() -def column_group_df(): - return pd.DataFrame( +def test_column_axis(): + column_group_df = pd.DataFrame( [[0, 1, 2, 3, 4, 5, 6], [0, 0, 1, 0, 1, 0, 2]], columns=["A", "B", "C", "D", "E", "F", "G"], ) - - -def test_column_axis(column_group_df): msg = "DataFrame.groupby with axis=1" with tm.assert_produces_warning(FutureWarning, match=msg): g = column_group_df.groupby(column_group_df.iloc[1], axis=1) diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py index 0b451ce73db89..738711019b5fd 100644 --- a/pandas/tests/groupby/test_raises.py +++ b/pandas/tests/groupby/test_raises.py @@ -67,19 +67,6 @@ def df_with_datetime_col(): return df -@pytest.fixture -def df_with_timedelta_col(): - df = DataFrame( - { - "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], - "b": [3, 3, 4, 4, 4, 4, 4, 3, 3], - "c": range(9), - "d": datetime.timedelta(days=1), - } - ) - return df - - @pytest.fixture def df_with_cat_col(): df = DataFrame( @@ -353,8 +340,15 @@ def test_groupby_raises_datetime_np( @pytest.mark.parametrize("func", ["prod", "cumprod", "skew", "var"]) -def test_groupby_raises_timedelta(func, df_with_timedelta_col): - df = df_with_timedelta_col +def test_groupby_raises_timedelta(func): + df = DataFrame( + { + "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], + "b": [3, 3, 4, 4, 4, 4, 4, 3, 3], + "c": range(9), + "d": datetime.timedelta(days=1), + } + ) gb = df.groupby(by="a") _call_and_check( diff --git a/pandas/tests/indexes/datetimelike_/test_nat.py b/pandas/tests/indexes/datetimelike_/test_nat.py index 50cf29d016355..3dd0ce1cbd637 100644 --- a/pandas/tests/indexes/datetimelike_/test_nat.py +++ b/pandas/tests/indexes/datetimelike_/test_nat.py @@ -10,44 +10,33 @@ import pandas._testing as tm -class NATests: - def test_nat(self, index_without_na): - empty_index = index_without_na[:0] - - index_with_na = index_without_na.copy(deep=True) - index_with_na._data[1] = NaT - - assert empty_index._na_value is NaT - assert index_with_na._na_value is NaT - assert index_without_na._na_value is NaT - - idx = index_without_na - assert idx._can_hold_na - - tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) - assert idx.hasnans is False - - idx = index_with_na - assert idx._can_hold_na - - tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) - assert idx.hasnans is True +@pytest.mark.parametrize( + "index_without_na", + [ + TimedeltaIndex(["1 days", "2 days"]), + PeriodIndex(["2011-01-01", "2011-01-02"], freq="D"), + DatetimeIndex(["2011-01-01", "2011-01-02"]), + DatetimeIndex(["2011-01-01", "2011-01-02"], tz="UTC"), + ], +) +def test_nat(index_without_na): + empty_index = index_without_na[:0] + index_with_na = index_without_na.copy(deep=True) + index_with_na._data[1] = NaT -class TestDatetimeIndexNA(NATests): - @pytest.fixture - def index_without_na(self, tz_naive_fixture): - tz = tz_naive_fixture - return DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) + assert empty_index._na_value is NaT + assert index_with_na._na_value is NaT + assert index_without_na._na_value is NaT + idx = index_without_na + assert idx._can_hold_na -class TestTimedeltaIndexNA(NATests): - @pytest.fixture - def index_without_na(self): - return TimedeltaIndex(["1 days", "2 days"]) + tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) + assert idx.hasnans is False + idx = index_with_na + assert idx._can_hold_na -class TestPeriodIndexNA(NATests): - @pytest.fixture - def index_without_na(self): - return PeriodIndex(["2011-01-01", "2011-01-02"], freq="D") + tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) + assert idx.hasnans is True diff --git a/pandas/tests/indexes/interval/test_constructors.py b/pandas/tests/indexes/interval/test_constructors.py index 778c07b46e57c..e9864723f026e 100644 --- a/pandas/tests/indexes/interval/test_constructors.py +++ b/pandas/tests/indexes/interval/test_constructors.py @@ -23,11 +23,6 @@ import pandas.core.common as com -@pytest.fixture(params=[None, "foo"]) -def name(request): - return request.param - - class ConstructorTests: """ Common tests for all variations of IntervalIndex construction. Input data @@ -35,8 +30,9 @@ class ConstructorTests: get_kwargs_from_breaks to the expected format. """ - @pytest.fixture( - params=[ + @pytest.mark.parametrize( + "breaks_and_expected_subtype", + [ ([3, 14, 15, 92, 653], np.int64), (np.arange(10, dtype="int64"), np.int64), (Index(np.arange(-10, 11, dtype=np.int64)), np.int64), @@ -48,11 +44,9 @@ class ConstructorTests: "datetime64[ns, US/Eastern]", ), (timedelta_range("1 day", periods=10), "" - assert repr(offset2) == "<2 * CustomBusinessMonthBegins>" + def test_repr(self): + assert repr(CBMonthBegin()) == "" + assert repr(CBMonthBegin(2)) == "<2 * CustomBusinessMonthBegins>" - def test_add_datetime(self, dt, offset2): - assert offset2 + dt == datetime(2008, 3, 3) + def test_add_datetime(self, dt): + assert CBMonthBegin(2) + dt == datetime(2008, 3, 3) def testRollback1(self): assert CDay(10).rollback(datetime(2007, 12, 31)) == datetime(2007, 12, 31) @@ -252,30 +240,18 @@ def test_apply_with_extra_offset(self, case): class TestCustomBusinessMonthEnd: - @pytest.fixture - def _offset(self): - return CBMonthEnd - - @pytest.fixture - def offset(self): - return CBMonthEnd() - - @pytest.fixture - def offset2(self): - return CBMonthEnd(2) - - def test_different_normalize_equals(self, _offset): + def test_different_normalize_equals(self): # GH#21404 changed __eq__ to return False when `normalize` does not match - offset = _offset() - offset2 = _offset(normalize=True) + offset = CBMonthEnd() + offset2 = CBMonthEnd(normalize=True) assert offset != offset2 - def test_repr(self, offset, offset2): - assert repr(offset) == "" - assert repr(offset2) == "<2 * CustomBusinessMonthEnds>" + def test_repr(self): + assert repr(CBMonthEnd()) == "" + assert repr(CBMonthEnd(2)) == "<2 * CustomBusinessMonthEnds>" - def test_add_datetime(self, dt, offset2): - assert offset2 + dt == datetime(2008, 2, 29) + def test_add_datetime(self, dt): + assert CBMonthEnd(2) + dt == datetime(2008, 2, 29) def testRollback1(self): assert CDay(10).rollback(datetime(2007, 12, 31)) == datetime(2007, 12, 31) diff --git a/pandas/tests/util/test_assert_produces_warning.py b/pandas/tests/util/test_assert_produces_warning.py index 5c27a3ee79d4a..88e9f0d8fccee 100644 --- a/pandas/tests/util/test_assert_produces_warning.py +++ b/pandas/tests/util/test_assert_produces_warning.py @@ -13,26 +13,6 @@ import pandas._testing as tm -@pytest.fixture( - params=[ - RuntimeWarning, - ResourceWarning, - UserWarning, - FutureWarning, - DeprecationWarning, - PerformanceWarning, - DtypeWarning, - ], -) -def category(request): - """ - Return unique warning. - - Useful for testing behavior of tm.assert_produces_warning with various categories. - """ - return request.param - - @pytest.fixture( params=[ (RuntimeWarning, UserWarning), @@ -73,6 +53,18 @@ def test_assert_produces_warning_honors_filter(): f() +@pytest.mark.parametrize( + "category", + [ + RuntimeWarning, + ResourceWarning, + UserWarning, + FutureWarning, + DeprecationWarning, + PerformanceWarning, + DtypeWarning, + ], +) @pytest.mark.parametrize( "message, match", [