From 9f9a64327a9cff6dd9ba0642500745691c631eb7 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 23 Dec 2023 15:08:26 -0800 Subject: [PATCH 01/12] Remove conftest in indexing --- pandas/tests/indexing/conftest.py | 127 -------------------------- pandas/tests/indexing/test_iloc.py | 15 ++- pandas/tests/indexing/test_loc.py | 131 +++++++++++++-------------- pandas/tests/indexing/test_scalar.py | 38 +++++--- 4 files changed, 99 insertions(+), 212 deletions(-) delete mode 100644 pandas/tests/indexing/conftest.py diff --git a/pandas/tests/indexing/conftest.py b/pandas/tests/indexing/conftest.py deleted file mode 100644 index 4184c6a0047cc..0000000000000 --- a/pandas/tests/indexing/conftest.py +++ /dev/null @@ -1,127 +0,0 @@ -import numpy as np -import pytest - -from pandas import ( - DataFrame, - Index, - MultiIndex, - Series, - date_range, -) - - -@pytest.fixture -def series_ints(): - return Series(np.random.default_rng(2).random(4), index=np.arange(0, 8, 2)) - - -@pytest.fixture -def frame_ints(): - return DataFrame( - np.random.default_rng(2).standard_normal((4, 4)), - index=np.arange(0, 8, 2), - columns=np.arange(0, 12, 3), - ) - - -@pytest.fixture -def series_uints(): - return Series( - np.random.default_rng(2).random(4), - index=Index(np.arange(0, 8, 2, dtype=np.uint64)), - ) - - -@pytest.fixture -def frame_uints(): - return DataFrame( - np.random.default_rng(2).standard_normal((4, 4)), - index=Index(range(0, 8, 2), dtype=np.uint64), - columns=Index(range(0, 12, 3), dtype=np.uint64), - ) - - -@pytest.fixture -def series_labels(): - return Series(np.random.default_rng(2).standard_normal(4), index=list("abcd")) - - -@pytest.fixture -def frame_labels(): - return DataFrame( - np.random.default_rng(2).standard_normal((4, 4)), - index=list("abcd"), - columns=list("ABCD"), - ) - - -@pytest.fixture -def series_ts(): - return Series( - np.random.default_rng(2).standard_normal(4), - index=date_range("20130101", periods=4), - ) - - -@pytest.fixture -def frame_ts(): - return DataFrame( - np.random.default_rng(2).standard_normal((4, 4)), - index=date_range("20130101", periods=4), - ) - - -@pytest.fixture -def series_floats(): - return Series( - np.random.default_rng(2).random(4), - index=Index(range(0, 8, 2), dtype=np.float64), - ) - - -@pytest.fixture -def frame_floats(): - return DataFrame( - np.random.default_rng(2).standard_normal((4, 4)), - index=Index(range(0, 8, 2), dtype=np.float64), - columns=Index(range(0, 12, 3), dtype=np.float64), - ) - - -@pytest.fixture -def series_mixed(): - return Series(np.random.default_rng(2).standard_normal(4), index=[2, 4, "null", 8]) - - -@pytest.fixture -def frame_mixed(): - return DataFrame( - np.random.default_rng(2).standard_normal((4, 4)), index=[2, 4, "null", 8] - ) - - -@pytest.fixture -def frame_empty(): - return DataFrame() - - -@pytest.fixture -def series_empty(): - return Series(dtype=object) - - -@pytest.fixture -def frame_multi(): - return DataFrame( - np.random.default_rng(2).standard_normal((4, 4)), - index=MultiIndex.from_product([[1, 2], [3, 4]]), - columns=MultiIndex.from_product([[5, 6], [7, 8]]), - ) - - -@pytest.fixture -def series_multi(): - return Series( - np.random.default_rng(2).random(4), - index=MultiIndex.from_product([[1, 2], [3, 4]]), - ) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 409eca42f404b..36f00a3ce7cd7 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -39,13 +39,18 @@ class TestiLoc: @pytest.mark.parametrize("key", [2, -1, [0, 1, 2]]) - @pytest.mark.parametrize("kind", ["series", "frame"]) @pytest.mark.parametrize( - "col", - ["labels", "mixed", "ts", "floats", "empty"], + "index", + [ + Index(list("abcd"), dtype=object), + Index([2, 4, "null", 8], dtype=object), + date_range("20130101", periods=4), + Index(range(0, 8, 2), dtype=np.float64), + Index([]), + ], ) - def test_iloc_getitem_int_and_list_int(self, key, kind, col, request): - obj = request.getfixturevalue(f"{kind}_{col}") + def test_iloc_getitem_int_and_list_int(self, key, frame_or_series, index, request): + obj = frame_or_series(range(len(index)), index=index) check_indexing_smoketest_or_raises( obj, "iloc", diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index fb0adc56c401b..60fa319001c3b 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -69,116 +69,111 @@ def test_none_values_on_string_columns(self): assert df.loc[2, "a"] is None - @pytest.mark.parametrize("kind", ["series", "frame"]) - def test_loc_getitem_int(self, kind, request): + def test_loc_getitem_int(self, frame_or_series): # int label - obj = request.getfixturevalue(f"{kind}_labels") + obj = frame_or_series(range(3), index=Index(list("abc"), dtype=object)) check_indexing_smoketest_or_raises(obj, "loc", 2, fails=KeyError) - @pytest.mark.parametrize("kind", ["series", "frame"]) - def test_loc_getitem_label(self, kind, request): + def test_loc_getitem_label(self, frame_or_series): # label - obj = request.getfixturevalue(f"{kind}_empty") + obj = frame_or_series() check_indexing_smoketest_or_raises(obj, "loc", "c", fails=KeyError) + @pytest.mark.parametrize("key", ["f", 20]) @pytest.mark.parametrize( - "key, typs, axes", + "index", [ - ["f", ["ints", "uints", "labels", "mixed", "ts"], None], - ["f", ["floats"], None], - [20, ["ints", "uints", "mixed"], None], - [20, ["labels"], None], - [20, ["ts"], 0], - [20, ["floats"], 0], + Index(list("abcd"), dtype=object), + Index([2, 4, "null", 8], dtype=object), + date_range("20130101", periods=4), + Index(range(0, 8, 2), dtype=np.float64), + Index([]), ], ) - @pytest.mark.parametrize("kind", ["series", "frame"]) - def test_loc_getitem_label_out_of_range(self, key, typs, axes, kind, request): - for typ in typs: - obj = request.getfixturevalue(f"{kind}_{typ}") - # out of range label - check_indexing_smoketest_or_raises( - obj, "loc", key, axes=axes, fails=KeyError - ) + def test_loc_getitem_label_out_of_range(self, key, index, frame_or_series): + obj = frame_or_series(range(len(index)), index=index) + # out of range label + check_indexing_smoketest_or_raises(obj, "loc", key, fails=KeyError) + + @pytest.mark.parametrize("key", [[0, 1, 2], [1, 3.0, "A"]]) + @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64]) + def test_loc_getitem_label_list(self, key, dtype, frame_or_series): + obj = frame_or_series(range(3), index=Index([0, 1, 2], dtype=dtype)) + # list of labels + check_indexing_smoketest_or_raises(obj, "loc", key, fails=KeyError) @pytest.mark.parametrize( - "key, typs", + "index", [ - [[0, 1, 2], ["ints", "uints", "floats"]], - [[1, 3.0, "A"], ["ints", "uints", "floats"]], + None, + Index([0, 1, 2], dtype=np.int64), + Index([0, 1, 2], dtype=np.uint64), + Index([0, 1, 2], dtype=np.float64), + MultiIndex.from_arrays([range(3), range(3)]), ], ) - @pytest.mark.parametrize("kind", ["series", "frame"]) - def test_loc_getitem_label_list(self, key, typs, kind, request): - for typ in typs: - obj = request.getfixturevalue(f"{kind}_{typ}") - # list of labels - check_indexing_smoketest_or_raises(obj, "loc", key, fails=KeyError) - @pytest.mark.parametrize( - "key, typs, axes", - [ - [[0, 1, 2], ["empty"], None], - [[0, 2, 10], ["ints", "uints", "floats"], 0], - [[3, 6, 7], ["ints", "uints", "floats"], 1], - # GH 17758 - MultiIndex and missing keys - [[(1, 3), (1, 4), (2, 5)], ["multi"], 0], - ], + "key", [[0, 1, 2], [0, 2, 10], [3, 6, 7], [(1, 3), (1, 4), (2, 5)]] ) - @pytest.mark.parametrize("kind", ["series", "frame"]) - def test_loc_getitem_label_list_with_missing(self, key, typs, axes, kind, request): - for typ in typs: - obj = request.getfixturevalue(f"{kind}_{typ}") - check_indexing_smoketest_or_raises( - obj, "loc", key, axes=axes, fails=KeyError - ) + def test_loc_getitem_label_list_with_missing(self, key, index, frame_or_series): + if index is None: + obj = frame_or_series() + else: + obj = frame_or_series(range(len(index)), index=index) + check_indexing_smoketest_or_raises(obj, "loc", key, fails=KeyError) - @pytest.mark.parametrize("typs", ["ints", "uints"]) - @pytest.mark.parametrize("kind", ["series", "frame"]) - def test_loc_getitem_label_list_fails(self, typs, kind, request): + @pytest.mark.parametrize("dtype", [np.int64, np.uint64]) + def test_loc_getitem_label_list_fails(self, dtype, frame_or_series): # fails - obj = request.getfixturevalue(f"{kind}_{typs}") + obj = frame_or_series(range(3), Index([0, 1, 2], dtype=dtype)) check_indexing_smoketest_or_raises( obj, "loc", [20, 30, 40], axes=1, fails=KeyError ) - def test_loc_getitem_label_array_like(self): - # TODO: test something? - # array like - pass - - @pytest.mark.parametrize("kind", ["series", "frame"]) - def test_loc_getitem_bool(self, kind, request): - obj = request.getfixturevalue(f"{kind}_empty") + def test_loc_getitem_bool(self, frame_or_series): + obj = frame_or_series() # boolean indexers b = [True, False, True, False] check_indexing_smoketest_or_raises(obj, "loc", b, fails=IndexError) @pytest.mark.parametrize( - "slc, typs, axes, fails", + "slc, indexes, axes, fails", [ [ slice(1, 3), - ["labels", "mixed", "empty", "ts", "floats"], + [ + Index(list("abcd"), dtype=object), + Index([2, 4, "null", 8], dtype=object), + None, + date_range("20130101", periods=4), + Index(range(0, 12, 3), dtype=np.float64), + ], None, TypeError, ], - [slice("20130102", "20130104"), ["ts"], 1, TypeError], - [slice(2, 8), ["mixed"], 0, TypeError], - [slice(2, 8), ["mixed"], 1, KeyError], - [slice(2, 4, 2), ["mixed"], 0, TypeError], + [ + slice("20130102", "20130104"), + [date_range("20130101", periods=4)], + 1, + TypeError, + ], + [slice(2, 8), [Index([2, 4, "null", 8], dtype=object)], 0, TypeError], + [slice(2, 8), [Index([2, 4, "null", 8], dtype=object)], 1, KeyError], + [slice(2, 4, 2), [Index([2, 4, "null", 8], dtype=object)], 0, TypeError], ], ) - @pytest.mark.parametrize("kind", ["series", "frame"]) - def test_loc_getitem_label_slice(self, slc, typs, axes, fails, kind, request): + def test_loc_getitem_label_slice(self, slc, indexes, axes, fails, frame_or_series): # label slices (with ints) # real label slices # GH 14316 - for typ in typs: - obj = request.getfixturevalue(f"{kind}_{typ}") + for index in indexes: + if index is None: + obj = frame_or_series() + else: + obj = frame_or_series(range(len(index)), index=index) check_indexing_smoketest_or_raises( obj, "loc", diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index 29e3dc0aebe95..ef4cd402aaf24 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -10,6 +10,7 @@ from pandas import ( DataFrame, + Index, Series, Timedelta, Timestamp, @@ -32,29 +33,42 @@ def generate_indices(f, values=False): class TestScalar: - @pytest.mark.parametrize("kind", ["series", "frame"]) - @pytest.mark.parametrize("col", ["ints", "uints"]) - def test_iat_set_ints(self, kind, col, request): - f = request.getfixturevalue(f"{kind}_{col}") + @pytest.mark.parametrize("dtype", [np.int64, np.uint64]) + def test_iat_set_ints(self, dtype, frame_or_series): + f = frame_or_series(range(3), index=Index([0, 1, 2], dtype=dtype)) indices = generate_indices(f, True) for i in indices: f.iat[i] = 1 expected = f.values[i] tm.assert_almost_equal(expected, 1) - @pytest.mark.parametrize("kind", ["series", "frame"]) - @pytest.mark.parametrize("col", ["labels", "ts", "floats"]) - def test_iat_set_other(self, kind, col, request): - f = request.getfixturevalue(f"{kind}_{col}") + @pytest.mark.parametrize( + "index", + [ + Index(list("abcd"), dtype=object), + date_range("20130101", periods=4), + Index(range(0, 8, 2), dtype=np.float64), + ], + ) + def test_iat_set_other(self, index, frame_or_series): + f = frame_or_series(range(len(index)), index=index) msg = "iAt based indexing can only have integer indexers" with pytest.raises(ValueError, match=msg): idx = next(generate_indices(f, False)) f.iat[idx] = 1 - @pytest.mark.parametrize("kind", ["series", "frame"]) - @pytest.mark.parametrize("col", ["ints", "uints", "labels", "ts", "floats"]) - def test_at_set_ints_other(self, kind, col, request): - f = request.getfixturevalue(f"{kind}_{col}") + @pytest.mark.parametrize( + "index", + [ + Index(list("abcd"), dtype=object), + date_range("20130101", periods=4), + Index(range(0, 8, 2), dtype=np.float64), + Index(range(0, 8, 2), dtype=np.uint64), + Index(range(0, 8, 2), dtype=np.int64), + ], + ) + def test_at_set_ints_other(self, index, frame_or_series): + f = frame_or_series(range(len(index)), index=index) indices = generate_indices(f, False) for i in indices: f.at[i] = 1 From e4f81bb3aff832bc1f23a109624b792e5f08f6e2 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 23 Dec 2023 17:42:25 -0800 Subject: [PATCH 02/12] Use less idx fixture in multi --- pandas/tests/indexes/multi/test_analytics.py | 17 +++++---- pandas/tests/indexes/multi/test_astype.py | 7 ++-- pandas/tests/indexes/multi/test_compat.py | 6 ++-- .../tests/indexes/multi/test_constructors.py | 3 +- pandas/tests/indexes/multi/test_conversion.py | 3 +- pandas/tests/indexes/multi/test_copy.py | 9 +++-- pandas/tests/indexes/multi/test_duplicates.py | 10 ++++-- .../tests/indexes/multi/test_equivalence.py | 21 ++++++----- pandas/tests/indexes/multi/test_formats.py | 6 ++-- pandas/tests/indexes/multi/test_get_set.py | 3 +- pandas/tests/indexes/multi/test_integrity.py | 17 +++++---- pandas/tests/indexes/multi/test_join.py | 3 +- pandas/tests/indexes/multi/test_missing.py | 10 +++--- pandas/tests/indexes/multi/test_reindex.py | 10 ++++-- pandas/tests/indexes/multi/test_reshape.py | 18 +++++++--- pandas/tests/indexes/multi/test_setops.py | 36 +++++++++++++------ pandas/tests/indexes/multi/test_sorting.py | 31 ++++------------ pandas/tests/indexes/multi/test_take.py | 6 +++- 18 files changed, 133 insertions(+), 83 deletions(-) diff --git a/pandas/tests/indexes/multi/test_analytics.py b/pandas/tests/indexes/multi/test_analytics.py index 87f1439db5fc8..a0e7510fbe2dc 100644 --- a/pandas/tests/indexes/multi/test_analytics.py +++ b/pandas/tests/indexes/multi/test_analytics.py @@ -11,13 +11,15 @@ import pandas._testing as tm -def test_infer_objects(idx): +def test_infer_objects(): + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) with pytest.raises(NotImplementedError, match="to_frame"): idx.infer_objects() -def test_shift(idx): +def test_shift(): # GH8083 test the base class for shift + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) msg = ( "This method is only implemented for DatetimeIndex, PeriodIndex and " "TimedeltaIndex; Got type MultiIndex" @@ -76,8 +78,9 @@ def test_truncate_multiindex(): # TODO: reshape -def test_reorder_levels(idx): +def test_reorder_levels(): # this blows up + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) with pytest.raises(IndexError, match="^Too many levels"): idx.reorder_levels([2, 1, 0]) @@ -174,9 +177,9 @@ def test_sub(idx): first.tolist() - idx[-3:] -def test_map(idx): +def test_map(): # callable - index = idx + index = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) result = index.map(lambda x: x) tm.assert_index_equal(result, index) @@ -235,10 +238,11 @@ def test_map_dictlike(idx, mapper): ], ids=lambda func: func.__name__, ) -def test_numpy_ufuncs(idx, func): +def test_numpy_ufuncs(func): # test ufuncs of numpy. see: # https://numpy.org/doc/stable/reference/ufuncs.html + idx = MultiIndex(levels=[["A", "B"]], codes=[[0, 1]]) expected_exception = TypeError msg = ( "loop of ufunc does not support argument 0 of type tuple which " @@ -254,6 +258,7 @@ def test_numpy_ufuncs(idx, func): ids=lambda func: func.__name__, ) def test_numpy_type_funcs(idx, func): + idx = MultiIndex(levels=[["A", "B"]], codes=[[0, 1]]) msg = ( f"ufunc '{func.__name__}' not supported for the input types, and the inputs " "could not be safely coerced to any supported types according to " diff --git a/pandas/tests/indexes/multi/test_astype.py b/pandas/tests/indexes/multi/test_astype.py index 29908537fbe59..1f9f8d91ad970 100644 --- a/pandas/tests/indexes/multi/test_astype.py +++ b/pandas/tests/indexes/multi/test_astype.py @@ -3,10 +3,12 @@ from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas import MultiIndex import pandas._testing as tm -def test_astype(idx): +def test_astype(): + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]], names=["foo"]) expected = idx.copy() actual = idx.astype("O") tm.assert_copy(actual.levels, expected.levels) @@ -18,7 +20,8 @@ def test_astype(idx): @pytest.mark.parametrize("ordered", [True, False]) -def test_astype_category(idx, ordered): +def test_astype_category(ordered): + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) # GH 18630 msg = "> 1 ndim Categorical are not supported at this time" with pytest.raises(NotImplementedError, match=msg): diff --git a/pandas/tests/indexes/multi/test_compat.py b/pandas/tests/indexes/multi/test_compat.py index 27a8c6e9b7158..4bc73f8272d08 100644 --- a/pandas/tests/indexes/multi/test_compat.py +++ b/pandas/tests/indexes/multi/test_compat.py @@ -6,7 +6,8 @@ import pandas._testing as tm -def test_numeric_compat(idx): +def test_numeric_compat(): + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) with pytest.raises(TypeError, match="cannot perform __mul__"): idx * 1 @@ -29,7 +30,8 @@ def test_numeric_compat(idx): @pytest.mark.parametrize("method", ["all", "any", "__invert__"]) -def test_logical_compat(idx, method): +def test_logical_compat(method): + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) msg = f"cannot perform {method}" with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index 8456e6a7acba5..0ad8831805087 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -385,7 +385,8 @@ def test_from_tuples_empty(): tm.assert_index_equal(result, expected) -def test_from_tuples_index_values(idx): +def test_from_tuples_index_values(): + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) result = MultiIndex.from_tuples(idx) assert (result.values == idx.values).all() diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index 3c2ca045d6f99..0109d67cb7dde 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -9,7 +9,8 @@ import pandas._testing as tm -def test_to_numpy(idx): +def test_to_numpy(): + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) result = idx.to_numpy() exp = idx.values tm.assert_numpy_array_equal(result, exp) diff --git a/pandas/tests/indexes/multi/test_copy.py b/pandas/tests/indexes/multi/test_copy.py index 2e09a580f9528..504496ea527cc 100644 --- a/pandas/tests/indexes/multi/test_copy.py +++ b/pandas/tests/indexes/multi/test_copy.py @@ -26,19 +26,22 @@ def assert_multiindex_copied(copy, original): assert copy.sortorder == original.sortorder -def test_copy(idx): +def test_copy(): + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) i_copy = idx.copy() assert_multiindex_copied(i_copy, idx) -def test_shallow_copy(idx): +def test_shallow_copy(): + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) i_copy = idx._view() assert_multiindex_copied(i_copy, idx) -def test_view(idx): +def test_view(): + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) i_view = idx.view() assert_multiindex_copied(i_view, idx) diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py index 6c6d9022b1af3..faf83407cfa50 100644 --- a/pandas/tests/indexes/multi/test_duplicates.py +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -145,13 +145,19 @@ def test_duplicate_meta_data(): assert idx.drop_duplicates().names == idx.names -def test_has_duplicates(idx, idx_dup): - # see fixtures +def test_has_duplicates(): + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) assert idx.is_unique is True assert idx.has_duplicates is False + + +def test_has_duplicates_with_dups(idx_dup): + # see fixtures assert idx_dup.is_unique is False assert idx_dup.has_duplicates is True + +def test_has_duplicates_other(): mi = MultiIndex( levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]] ) diff --git a/pandas/tests/indexes/multi/test_equivalence.py b/pandas/tests/indexes/multi/test_equivalence.py index 9babbd5b8d56d..079f6194c2ff9 100644 --- a/pandas/tests/indexes/multi/test_equivalence.py +++ b/pandas/tests/indexes/multi/test_equivalence.py @@ -12,7 +12,8 @@ import pandas._testing as tm -def test_equals(idx): +def test_equals(): + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) assert idx.equals(idx) assert idx.equals(idx.copy()) assert idx.equals(idx.astype(object)) @@ -26,10 +27,6 @@ def test_equals(idx): assert idx.equals(same_values) assert same_values.equals(idx) - if idx.nlevels == 1: - # do not test MultiIndex - assert not idx.equals(Series(idx)) - def test_equals_op(idx): # GH9947, GH10637 @@ -132,7 +129,8 @@ def test_compare_tuple_strs(): tm.assert_numpy_array_equal(result, expected) -def test_equals_multi(idx): +def test_equals_multi(): + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) assert idx.equals(idx) assert not idx.equals(idx.values) assert idx.equals(Index(idx.values)) @@ -141,6 +139,8 @@ def test_equals_multi(idx): assert not idx.equals(idx[:-1]) assert not idx.equals(idx[-1]) + +def test_equals_multi_different_levels(idx): # different number of levels index = MultiIndex( levels=[Index(list(range(4))), Index(list(range(4))), Index(list(range(4)))], @@ -181,7 +181,8 @@ def test_equals_multi(idx): assert not idx.equals(index) -def test_identical(idx): +def test_identical(): + idx = MultiIndex(levels=[[0, 1], [2, 3]], codes=[[0, 1], [0, 1]]) mi = idx.copy() mi2 = idx.copy() assert mi.identical(mi2) @@ -249,12 +250,14 @@ def test_is_(): assert not mi5.is_(mi) -def test_is_all_dates(idx): +def test_is_all_dates(): + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) assert not idx._is_all_dates -def test_is_numeric(idx): +def test_is_numeric(): # MultiIndex is never numeric + idx = MultiIndex(levels=[["A", "B"]], codes=[[0, 1]]) assert not is_any_real_numeric_dtype(idx) diff --git a/pandas/tests/indexes/multi/test_formats.py b/pandas/tests/indexes/multi/test_formats.py index 52ff3109128f2..b3988e824775b 100644 --- a/pandas/tests/indexes/multi/test_formats.py +++ b/pandas/tests/indexes/multi/test_formats.py @@ -9,8 +9,9 @@ import pandas._testing as tm -def test_format(idx): +def test_format(): msg = "MultiIndex.format is deprecated" + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) with tm.assert_produces_warning(FutureWarning, match=msg): idx.format() idx[:0].format() @@ -70,8 +71,9 @@ def test_unicode_string_with_unicode(): str(idx) -def test_repr_max_seq_item_setting(idx): +def test_repr_max_seq_item_setting(): # GH10182 + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) idx = idx.repeat(50) with pd.option_context("display.max_seq_items", None): repr(idx) diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index 6eeaeb6711d03..969cbf6fb8cd9 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -242,7 +242,8 @@ def test_set_codes(idx): assert result.equals(expected) -def test_set_levels_codes_names_bad_input(idx): +def test_set_levels_codes_names_bad_input(): + idx = MultiIndex(levels=[["A", "B"], ["B", "C"]], codes=[[0, 1], [0, 1]]) levels, codes = idx.levels, idx.codes names = idx.names diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py index d956747cbc859..93262da50b0f6 100644 --- a/pandas/tests/indexes/multi/test_integrity.py +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -163,10 +163,11 @@ def test_take_invalid_kwargs(): idx.take(indices, mode="clip") -def test_isna_behavior(idx): +def test_isna_behavior(): # should not segfault GH5123 # NOTE: if MI representation changes, may make sense to allow # isna(MI) + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) msg = "isna is not defined for MultiIndex" with pytest.raises(NotImplementedError, match=msg): pd.isna(idx) @@ -208,12 +209,14 @@ def test_mi_hashtable_populated_attribute_error(monkeypatch): df["a"].foo() -def test_can_hold_identifiers(idx): +def test_can_hold_identifiers(): + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) key = idx[0] assert idx._can_hold_identifiers_and_holds_name(key) is True -def test_metadata_immutable(idx): +def test_metadata_immutable(): + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) levels, codes = idx.levels, idx.codes # shouldn't be able to set at either the top level or base level mutable_regex = re.compile("does not support mutable operations") @@ -265,7 +268,8 @@ def test_rangeindex_fallback_coercion_bug(): tm.assert_index_equal(result, expected) -def test_memory_usage(idx): +def test_memory_usage(): + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) result = idx.memory_usage() if len(idx): idx.get_loc(idx[0]) @@ -285,5 +289,6 @@ def test_memory_usage(idx): assert result == 0 -def test_nlevels(idx): - assert idx.nlevels == 2 +def test_nlevels(): + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) + assert idx.nlevels == 1 diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py index edd0feaaa1159..1b88ef790723c 100644 --- a/pandas/tests/indexes/multi/test_join.py +++ b/pandas/tests/indexes/multi/test_join.py @@ -50,7 +50,8 @@ def test_join_level_corner_case(idx): idx.join(idx, level=1) -def test_join_self(idx, join_type): +def test_join_self(join_type): + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) result = idx.join(idx, how=join_type) expected = idx if join_type == "outer": diff --git a/pandas/tests/indexes/multi/test_missing.py b/pandas/tests/indexes/multi/test_missing.py index 14ffc42fb4b59..5e3c545f6a35f 100644 --- a/pandas/tests/indexes/multi/test_missing.py +++ b/pandas/tests/indexes/multi/test_missing.py @@ -6,8 +6,9 @@ import pandas._testing as tm -def test_fillna(idx): +def test_fillna(): # GH 11343 + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) msg = "isna is not defined for MultiIndex" with pytest.raises(NotImplementedError, match=msg): idx.fillna(idx[0]) @@ -53,18 +54,19 @@ def test_dropna(): tm.assert_index_equal(idx.dropna(how="all"), expected) -def test_nulls(idx): +def test_nulls(): # this is really a smoke test for the methods # as these are adequately tested for function elsewhere - + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) msg = "isna is not defined for MultiIndex" with pytest.raises(NotImplementedError, match=msg): idx.isna() @pytest.mark.xfail(reason="isna is not defined for MultiIndex") -def test_hasnans_isnans(idx): +def test_hasnans_isnans(): # GH 11343, added tests for hasnans / isnans + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) index = idx.copy() # cases in indices doesn't include NaN diff --git a/pandas/tests/indexes/multi/test_reindex.py b/pandas/tests/indexes/multi/test_reindex.py index d1b4fe8b98760..007480db1de1b 100644 --- a/pandas/tests/indexes/multi/test_reindex.py +++ b/pandas/tests/indexes/multi/test_reindex.py @@ -9,7 +9,12 @@ import pandas._testing as tm -def test_reindex(idx): +def test_reindex(): + idx = MultiIndex( + levels=[list(range(5)), list(range(1, 6))], + codes=[list(range(5)), list(range(5))], + names=["first", "second"], + ) result, indexer = idx.reindex(list(idx[:4])) assert isinstance(result, MultiIndex) assert result.names == ["first", "second"] @@ -92,7 +97,8 @@ def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array( assert mi.reindex([], level=1)[0].levels[1].dtype == dti.dtype -def test_reindex_base(idx): +def test_reindex_base(): + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) expected = np.arange(idx.size, dtype=np.intp) actual = idx.get_indexer(idx) diff --git a/pandas/tests/indexes/multi/test_reshape.py b/pandas/tests/indexes/multi/test_reshape.py index 06dbb33aadf97..13d5bd1ae8d65 100644 --- a/pandas/tests/indexes/multi/test_reshape.py +++ b/pandas/tests/indexes/multi/test_reshape.py @@ -12,7 +12,12 @@ import pandas._testing as tm -def test_insert(idx): +def test_insert(): + idx = MultiIndex( + levels=[["bar", "foo"], ["two", "one"]], + codes=[[0, 1], [0, 1]], + names=["first", "second"], + ) # key contained in all levels new_index = idx.insert(0, ("bar", "two")) assert new_index.equal_levels(idx) @@ -34,6 +39,8 @@ def test_insert(idx): with pytest.raises(ValueError, match=msg): idx.insert(0, ("foo2",)) + +def test_insert_reindex(): left = pd.DataFrame([["a", "b", 0], ["b", "d", 1]], columns=["1st", "2nd", "3rd"]) left.set_index(["1st", "2nd"], inplace=True) ts = left["3rd"].copy(deep=True) @@ -90,7 +97,8 @@ def test_insert2(): tm.assert_series_equal(left, right) -def test_append(idx): +def test_append(): + idx = MultiIndex(levels=[list(range(5))], codes=[list(range(5))]) result = idx[:3].append(idx[3:]) assert result.equals(idx) @@ -201,14 +209,16 @@ def test_repeat(): tm.assert_index_equal(m.repeat(reps), expected) -def test_insert_base(idx): +def test_insert_base(): + idx = MultiIndex(levels=[list(range(5))], codes=[list(range(5))]) result = idx[1:4] # test 0th element assert idx[0:4].equals(result.insert(0, idx[0])) -def test_delete_base(idx): +def test_delete_base(): + idx = MultiIndex(levels=[list(range(6))], codes=[list(range(6))]) expected = idx[1:] result = idx.delete(0) assert result.equals(expected) diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index 0abb56ecf9de7..025381616743a 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -21,15 +21,17 @@ @pytest.mark.parametrize( "method", ["intersection", "union", "difference", "symmetric_difference"] ) -def test_set_ops_error_cases(idx, case, sort, method): +def test_set_ops_error_cases(case, sort, method): # non-iterable input + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) msg = "Input must be Index or array-like" with pytest.raises(TypeError, match=msg): getattr(idx, method)(case, sort=sort) @pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list]) -def test_intersection_base(idx, sort, klass): +def test_intersection_base(sort, klass): + idx = MultiIndex(levels=[list(range(5))], codes=[list(range(5))]) first = idx[2::-1] # first 3 elements reversed second = idx[:5] @@ -50,7 +52,8 @@ def test_intersection_base(idx, sort, klass): @pytest.mark.arm_slow @pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list]) -def test_union_base(idx, sort, klass): +def test_union_base(sort, klass): + idx = MultiIndex(levels=[list(range(5))], codes=[list(range(5))]) first = idx[::-1] second = idx[:5] @@ -69,7 +72,8 @@ def test_union_base(idx, sort, klass): first.union([1, 2, 3], sort=sort) -def test_difference_base(idx, sort): +def test_difference_base(sort): + idx = MultiIndex(levels=[list(range(5))], codes=[list(range(5))]) second = idx[4:] answer = idx[:4] result = idx.difference(second, sort=sort) @@ -91,7 +95,8 @@ def test_difference_base(idx, sort): idx.difference([1, 2, 3], sort=sort) -def test_symmetric_difference(idx, sort): +def test_symmetric_difference(sort): + idx = MultiIndex(levels=[list(range(5))], codes=[list(range(5))]) first = idx[1:] second = idx[:-1] answer = idx[[-1, 0]] @@ -124,13 +129,18 @@ def test_multiindex_symmetric_difference(): assert result.names == [None, None] -def test_empty(idx): +def test_empty(): # GH 15270 + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) assert not idx.empty assert idx[:0].empty -def test_difference(idx, sort): +def test_difference(sort): + idx = MultiIndex( + levels=[list(range(5)), list(range(1, 6))], + codes=[list(range(5)), list(range(5))], + ) first = idx result = first.difference(idx[-3:], sort=sort) vals = idx[:-3].values @@ -237,7 +247,8 @@ def test_difference_sort_incomparable_true(): idx.difference(other, sort=True) -def test_union(idx, sort): +def test_union(sort): + idx = MultiIndex(levels=[list(range(5))], codes=[list(range(5))]) piece1 = idx[:5][::-1] piece2 = idx[3:] @@ -282,7 +293,8 @@ def test_union_with_regular_index(idx, using_infer_string): assert not result.equals(result2) -def test_intersection(idx, sort): +def test_intersection(sort): + idx = MultiIndex(levels=[list(range(5))], codes=[list(range(5))]) piece1 = idx[:5][::-1] piece2 = idx[3:] @@ -310,7 +322,8 @@ def test_intersection(idx, sort): @pytest.mark.parametrize( "method", ["intersection", "union", "difference", "symmetric_difference"] ) -def test_setop_with_categorical(idx, sort, method): +def test_setop_with_categorical(sort, method): + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) other = idx.to_flat_index().astype("category") res_names = [None] * idx.nlevels @@ -323,7 +336,8 @@ def test_setop_with_categorical(idx, sort, method): tm.assert_index_equal(result, expected) -def test_intersection_non_object(idx, sort): +def test_intersection_non_object(sort): + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) other = Index(range(3), name="foo") result = idx.intersection(other, sort=sort) diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index b4dcef71dcf50..4748016d2943e 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -7,11 +7,9 @@ ) from pandas import ( - CategoricalIndex, DataFrame, Index, MultiIndex, - RangeIndex, Series, Timestamp, ) @@ -19,7 +17,8 @@ from pandas.core.indexes.frozen import FrozenList -def test_sortlevel(idx): +def test_sortlevel(): + idx = MultiIndex(levels=[[0, 1], [1, 2]], codes=[[0, 1], [0, 1]]) tuples = list(idx) np.random.default_rng(2).shuffle(tuples) @@ -83,31 +82,12 @@ def test_sortlevel_na_position(): tm.assert_index_equal(result, expected) -def test_numpy_argsort(idx): +def test_numpy_argsort(): + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) result = np.argsort(idx) expected = idx.argsort() tm.assert_numpy_array_equal(result, expected) - # these are the only two types that perform - # pandas compatibility input validation - the - # rest already perform separate (or no) such - # validation via their 'values' attribute as - # defined in pandas.core.indexes/base.py - they - # cannot be changed at the moment due to - # backwards compatibility concerns - if isinstance(type(idx), (CategoricalIndex, RangeIndex)): - msg = "the 'axis' parameter is not supported" - with pytest.raises(ValueError, match=msg): - np.argsort(idx, axis=1) - - msg = "the 'kind' parameter is not supported" - with pytest.raises(ValueError, match=msg): - np.argsort(idx, kind="mergesort") - - msg = "the 'order' parameter is not supported" - with pytest.raises(ValueError, match=msg): - np.argsort(idx, order=("a", "b")) - def test_unsortedindex(): # GH 11897 @@ -277,7 +257,8 @@ def test_remove_unused_nan(level0, level1): assert "unused" not in result.levels[level] -def test_argsort(idx): +def test_argsort(): + idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) result = idx.argsort() expected = idx.values.argsort() tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_take.py b/pandas/tests/indexes/multi/test_take.py index 543cba25c373b..6ae927137a304 100644 --- a/pandas/tests/indexes/multi/test_take.py +++ b/pandas/tests/indexes/multi/test_take.py @@ -11,13 +11,17 @@ def test_take(idx): expected = idx[indexer] assert result.equals(expected) + +def test_freq(): # GH 10791 + idx = pd.MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) msg = "'MultiIndex' object has no attribute 'freq'" with pytest.raises(AttributeError, match=msg): idx.freq -def test_take_invalid_kwargs(idx): +def test_take_invalid_kwargs(): + idx = pd.MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) indices = [1, 2] msg = r"take\(\) got an unexpected keyword argument 'foo'" From 29e7cd570ca46d4618492bec4937153afed4fba4 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 23 Dec 2023 21:27:48 -0800 Subject: [PATCH 03/12] Remove other seldom used fixtures --- pandas/tests/arithmetic/test_numeric.py | 21 +----- pandas/tests/dtypes/test_inference.py | 7 +- pandas/tests/frame/indexing/test_indexing.py | 34 +++------ pandas/tests/frame/methods/test_join.py | 26 ++----- pandas/tests/frame/methods/test_nlargest.py | 34 ++++----- pandas/tests/frame/test_subclass.py | 21 +++--- pandas/tests/frame/test_validate.py | 8 +-- pandas/tests/generic/test_finalize.py | 9 +-- .../generic/test_label_or_level_utils.py | 13 +--- pandas/tests/groupby/methods/test_describe.py | 65 +++++++---------- .../groupby/methods/test_value_counts.py | 57 ++++++--------- pandas/tests/groupby/test_all_methods.py | 69 +++++++++---------- pandas/tests/groupby/test_index_as_string.py | 53 ++++++-------- pandas/tests/groupby/test_indexing.py | 43 +++++------- pandas/tests/groupby/test_raises.py | 24 +++---- .../tests/indexes/datetimelike_/test_nat.py | 59 +++++++--------- .../indexes/interval/test_constructors.py | 29 +++----- .../indexes/interval/test_interval_tree.py | 6 +- pandas/tests/indexes/numeric/test_indexing.py | 11 ++- pandas/tests/indexes/numeric/test_numeric.py | 29 +++----- pandas/tests/indexes/numeric/test_setops.py | 6 +- pandas/tests/indexing/multiindex/test_loc.py | 14 ++-- pandas/tests/interchange/test_impl.py | 34 ++++----- pandas/tests/io/test_orc.py | 27 +++----- pandas/tests/test_downstream.py | 48 ++++--------- 25 files changed, 274 insertions(+), 473 deletions(-) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index d8c1786b6b422..4e978a874b15d 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -37,14 +37,6 @@ def switch_numexpr_min_elements(request, monkeypatch): yield request.param -@pytest.fixture(params=[Index, Series, tm.to_array]) -def box_pandas_1d_array(request): - """ - Fixture to test behavior for Index, Series and tm.to_array classes - """ - return request.param - - @pytest.fixture( params=[ # TODO: add more dtypes here @@ -62,17 +54,6 @@ def numeric_idx(request): return request.param -@pytest.fixture( - params=[Index, Series, tm.to_array, np.array, list], ids=lambda x: x.__name__ -) -def box_1d_array(request): - """ - Fixture to test behavior for Index, Series, tm.to_array, numpy Array and list - classes - """ - return request.param - - def adjust_negative_zero(zero, expected): """ Helper to adjust the expected result if we are dividing by -0.0 @@ -1503,6 +1484,8 @@ def test_dataframe_div_silenced(): "data, expected_data", [([0, 1, 2], [0, 2, 4])], ) +@pytest.mark.parametrize("box_pandas_1d_array", [Index, Series, tm.to_array]) +@pytest.mark.parametrize("box_1d_array", [Index, Series, tm.to_array, np.array, list]) def test_integer_array_add_list_like( box_pandas_1d_array, box_1d_array, data, expected_data ): diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 49eb06c299886..e58fd03b8e306 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1983,9 +1983,12 @@ def test_nan_to_nat_conversions(): @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning") +@pytest.mark.parametrize("spmatrix", ["bsr", "coo", "csc", "csr", "dia", "dok", "lil"]) def test_is_scipy_sparse(spmatrix): - pytest.importorskip("scipy") - assert is_scipy_sparse(spmatrix([[0, 1]])) + sparse = pytest.importorskip("scipy.sparse") + + klass = getattr(sparse, spmatrix + "_matrix") + assert is_scipy_sparse(klass([[0, 1]])) assert not is_scipy_sparse(np.array([1])) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 97e7ae15c6c63..a43f32778a048 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1673,25 +1673,6 @@ def orig(self): orig = DataFrame({"cats": cats, "values": values}, index=idx) return orig - @pytest.fixture - def exp_single_row(self): - # The expected values if we change a single row - cats1 = Categorical(["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"]) - idx1 = Index(["h", "i", "j", "k", "l", "m", "n"]) - values1 = [1, 1, 2, 1, 1, 1, 1] - exp_single_row = DataFrame({"cats": cats1, "values": values1}, index=idx1) - return exp_single_row - - @pytest.fixture - def exp_multi_row(self): - # assign multiple rows (mixed values) (-> array) -> exp_multi_row - # changed multiple rows - cats2 = Categorical(["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"]) - idx2 = Index(["h", "i", "j", "k", "l", "m", "n"]) - values2 = [1, 1, 2, 2, 1, 1, 1] - exp_multi_row = DataFrame({"cats": cats2, "values": values2}, index=idx2) - return exp_multi_row - @pytest.fixture def exp_parts_cats_col(self): # changed part of the cats column @@ -1713,7 +1694,7 @@ def exp_single_cats_value(self): return exp_single_cats_value @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) - def test_loc_iloc_setitem_list_of_lists(self, orig, exp_multi_row, indexer): + def test_loc_iloc_setitem_list_of_lists(self, orig, indexer): # - assign multiple rows (mixed values) -> exp_multi_row df = orig.copy() @@ -1722,6 +1703,11 @@ def test_loc_iloc_setitem_list_of_lists(self, orig, exp_multi_row, indexer): key = slice("j", "k") indexer(df)[key, :] = [["b", 2], ["b", 2]] + + cats2 = Categorical(["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"]) + idx2 = Index(["h", "i", "j", "k", "l", "m", "n"]) + values2 = [1, 1, 2, 2, 1, 1, 1] + exp_multi_row = DataFrame({"cats": cats2, "values": values2}, index=idx2) tm.assert_frame_equal(df, exp_multi_row) df = orig.copy() @@ -1763,9 +1749,7 @@ def test_loc_iloc_setitem_mask_single_value_in_categories( tm.assert_frame_equal(df, exp_single_cats_value) @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) - def test_loc_iloc_setitem_full_row_non_categorical_rhs( - self, orig, exp_single_row, indexer - ): + def test_loc_iloc_setitem_full_row_non_categorical_rhs(self, orig, indexer): # - assign a complete row (mixed values) -> exp_single_row df = orig.copy() @@ -1775,6 +1759,10 @@ def test_loc_iloc_setitem_full_row_non_categorical_rhs( # not categorical dtype, but "b" _is_ among the categories for df["cat"] indexer(df)[key, :] = ["b", 2] + cats1 = Categorical(["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"]) + idx1 = Index(["h", "i", "j", "k", "l", "m", "n"]) + values1 = [1, 1, 2, 1, 1, 1, 1] + exp_single_row = DataFrame({"cats": cats1, "values": values1}, index=idx1) tm.assert_frame_equal(df, exp_single_row) # "c" is not among the categories for df["cat"] diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py index 735f6c50ab739..06c0f6ea3c5c0 100644 --- a/pandas/tests/frame/methods/test_join.py +++ b/pandas/tests/frame/methods/test_join.py @@ -17,25 +17,6 @@ from pandas.core.reshape.concat import concat -@pytest.fixture -def frame_with_period_index(): - return DataFrame( - data=np.arange(20).reshape(4, 5), - columns=list("abcde"), - index=period_range(start="2000", freq="Y", periods=4), - ) - - -@pytest.fixture -def left(): - return DataFrame({"a": [20, 10, 0]}, index=[2, 1, 0]) - - -@pytest.fixture -def right(): - return DataFrame({"b": [300, 100, 200]}, index=[3, 1, 2]) - - @pytest.fixture def left_no_dup(): return DataFrame( @@ -113,6 +94,8 @@ def right_w_dups(right_no_dup): ], ) def test_join(left, right, how, sort, expected): + left = DataFrame({"a": [20, 10, 0]}, index=[2, 1, 0]) + right = DataFrame({"b": [300, 100, 200]}, index=[3, 1, 2]) result = left.join(right, how=how, sort=sort, validate="1:1") tm.assert_frame_equal(result, expected) @@ -348,6 +331,11 @@ def test_join_overlap(float_frame): def test_join_period_index(frame_with_period_index): + frame_with_period_index = DataFrame( + data=np.arange(20).reshape(4, 5), + columns=list("abcde"), + index=period_range(start="2000", freq="Y", periods=4), + ) other = frame_with_period_index.rename(columns=lambda key: f"{key}{key}") joined_values = np.concatenate([frame_with_period_index.values] * 2, axis=1) diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py index 3ba893501914a..22227bbb62d39 100644 --- a/pandas/tests/frame/methods/test_nlargest.py +++ b/pandas/tests/frame/methods/test_nlargest.py @@ -12,25 +12,6 @@ from pandas.util.version import Version -@pytest.fixture -def df_duplicates(): - return pd.DataFrame( - {"a": [1, 2, 3, 4, 4], "b": [1, 1, 1, 1, 1], "c": [0, 1, 2, 5, 4]}, - index=[0, 0, 1, 1, 1], - ) - - -@pytest.fixture -def df_strings(): - return pd.DataFrame( - { - "a": np.random.default_rng(2).permutation(10), - "b": list(ascii_lowercase[:10]), - "c": np.random.default_rng(2).permutation(10).astype("float64"), - } - ) - - @pytest.fixture def df_main_dtypes(): return pd.DataFrame( @@ -83,7 +64,13 @@ class TestNLargestNSmallest: @pytest.mark.parametrize("n", range(1, 11)) def test_nlargest_n(self, df_strings, nselect_method, n, order): # GH#10393 - df = df_strings + df = pd.DataFrame( + { + "a": np.random.default_rng(2).permutation(10), + "b": list(ascii_lowercase[:10]), + "c": np.random.default_rng(2).permutation(10).astype("float64"), + } + ) if "b" in order: error_msg = ( f"Column 'b' has dtype (object|string), " @@ -156,10 +143,13 @@ def test_nlargest_n_identical_values(self): [["a", "b", "c"], ["c", "b", "a"], ["a"], ["b"], ["a", "b"], ["c", "b"]], ) @pytest.mark.parametrize("n", range(1, 6)) - def test_nlargest_n_duplicate_index(self, df_duplicates, n, order, request): + def test_nlargest_n_duplicate_index(self, n, order, request): # GH#13412 - df = df_duplicates + df = pd.DataFrame( + {"a": [1, 2, 3, 4, 4], "b": [1, 1, 1, 1, 1], "c": [0, 1, 2, 5, 4]}, + index=[0, 0, 1, 1, 1], + ) result = df.nsmallest(n, order) expected = df.sort_values(order).head(n) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index ef78ae62cb4d6..cf4f2071a8aae 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -15,16 +15,6 @@ ) -@pytest.fixture() -def gpd_style_subclass_df(): - class SubclassedDataFrame(DataFrame): - @property - def _constructor(self): - return SubclassedDataFrame - - return SubclassedDataFrame({"a": [1, 2, 3]}) - - class TestDataFrameSubclassing: def test_frame_subclassing_and_slicing(self): # Subclass frame and ensure it returns the right class on slicing it @@ -716,8 +706,15 @@ def test_convert_dtypes_preserves_subclass(self, gpd_style_subclass_df): result = df.convert_dtypes() assert isinstance(result, tm.SubclassedDataFrame) - result = gpd_style_subclass_df.convert_dtypes() - assert isinstance(result, type(gpd_style_subclass_df)) + def test_convert_dtypes_preserves_subclass_with_constructor(self): + class SubclassedDataFrame(DataFrame): + @property + def _constructor(self): + return SubclassedDataFrame + + df = SubclassedDataFrame({"a": [1, 2, 3]}) + result = df.convert_dtypes() + assert isinstance(result, SubclassedDataFrame) def test_astype_preserves_subclass(self): # GH#40810 diff --git a/pandas/tests/frame/test_validate.py b/pandas/tests/frame/test_validate.py index e99e0a6863848..fdeecba29a617 100644 --- a/pandas/tests/frame/test_validate.py +++ b/pandas/tests/frame/test_validate.py @@ -3,11 +3,6 @@ from pandas.core.frame import DataFrame -@pytest.fixture -def dataframe(): - return DataFrame({"a": [1, 2], "b": [3, 4]}) - - class TestDataFrameValidate: """Tests for error handling related to data types of method arguments.""" @@ -24,7 +19,8 @@ class TestDataFrameValidate: ], ) @pytest.mark.parametrize("inplace", [1, "True", [1, 2, 3], 5.0]) - def test_validate_bool_args(self, dataframe, func, inplace): + def test_validate_bool_args(self, func, inplace): + dataframe = DataFrame({"a": [1, 2], "b": [3, 4]}) msg = 'For argument "inplace" expected type bool' kwargs = {"inplace": inplace} diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 866e9e203ffe3..f25e7d4ab8c79 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -386,18 +386,11 @@ def idfn(x): return str(x) -@pytest.fixture(params=_all_methods, ids=lambda x: idfn(x[-1])) -def ndframe_method(request): - """ - An NDFrame method returning an NDFrame. - """ - return request.param - - @pytest.mark.filterwarnings( "ignore:DataFrame.fillna with 'method' is deprecated:FutureWarning", "ignore:last is deprecated:FutureWarning", ) +@pytest.mark.parametrize("ndframe_method", _all_methods, ids=lambda x: idfn(x[-1])) def test_finalize_called(ndframe_method): cls, init_args, method = ndframe_method ndframe = cls(*init_args) diff --git a/pandas/tests/generic/test_label_or_level_utils.py b/pandas/tests/generic/test_label_or_level_utils.py index 97be46f716d7d..80c24c647bc11 100644 --- a/pandas/tests/generic/test_label_or_level_utils.py +++ b/pandas/tests/generic/test_label_or_level_utils.py @@ -34,15 +34,6 @@ def df_ambig(df): return df -@pytest.fixture -def df_duplabels(df): - """DataFrame with level 'L1' and labels 'L2', 'L3', and 'L2'""" - df = df.set_index(["L1"]) - df = pd.concat([df, df["L2"]], axis=1) - - return df - - # Test is label/level reference # ============================= def get_labels_levels(df_levels): @@ -229,7 +220,9 @@ def test_get_label_or_level_values_df_ambig(df_ambig, axis): assert_label_values(df_ambig, ["L3"], axis=axis) -def test_get_label_or_level_values_df_duplabels(df_duplabels, axis): +def test_get_label_or_level_values_df_duplabels(df, axis): + df = df.set_index(["L1"]) + df_duplabels = pd.concat([df, df["L2"]], axis=1) axis = df_duplabels._get_axis_number(axis) # Transpose frame if axis == 1 if axis == 1: diff --git a/pandas/tests/groupby/methods/test_describe.py b/pandas/tests/groupby/methods/test_describe.py index a2440e09dfc02..2e75694f7acc7 100644 --- a/pandas/tests/groupby/methods/test_describe.py +++ b/pandas/tests/groupby/methods/test_describe.py @@ -227,49 +227,34 @@ def test_describe_duplicate_columns(): tm.assert_frame_equal(result, expected) -class TestGroupByNonCythonPaths: +def test_describe_non_cython_paths(): # GH#5610 non-cython calls should not include the grouper # Tests for code not expected to go through cython paths. + df = DataFrame( + [[1, 2, "foo"], [1, np.nan, "bar"], [3, np.nan, "baz"]], + columns=["A", "B", "C"], + ) + gb = df.groupby("A") + expected_index = Index([1, 3], name="A") + expected_col = MultiIndex( + levels=[["B"], ["count", "mean", "std", "min", "25%", "50%", "75%", "max"]], + codes=[[0] * 8, list(range(8))], + ) + expected = DataFrame( + [ + [1.0, 2.0, np.nan, 2.0, 2.0, 2.0, 2.0, 2.0], + [0.0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + ], + index=expected_index, + columns=expected_col, + ) + result = gb.describe() + tm.assert_frame_equal(result, expected) - @pytest.fixture - def df(self): - df = DataFrame( - [[1, 2, "foo"], [1, np.nan, "bar"], [3, np.nan, "baz"]], - columns=["A", "B", "C"], - ) - return df - - @pytest.fixture - def gb(self, df): - gb = df.groupby("A") - return gb - - @pytest.fixture - def gni(self, df): - gni = df.groupby("A", as_index=False) - return gni - - def test_describe(self, df, gb, gni): - # describe - expected_index = Index([1, 3], name="A") - expected_col = MultiIndex( - levels=[["B"], ["count", "mean", "std", "min", "25%", "50%", "75%", "max"]], - codes=[[0] * 8, list(range(8))], - ) - expected = DataFrame( - [ - [1.0, 2.0, np.nan, 2.0, 2.0, 2.0, 2.0, 2.0], - [0.0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], - ], - index=expected_index, - columns=expected_col, - ) - result = gb.describe() - tm.assert_frame_equal(result, expected) - - expected = expected.reset_index() - result = gni.describe() - tm.assert_frame_equal(result, expected) + gni = df.groupby("A", as_index=False) + expected = expected.reset_index() + result = gni.describe() + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("dtype", [int, float, object]) diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py index 2fa79c815d282..0301c51b7f538 100644 --- a/pandas/tests/groupby/methods/test_value_counts.py +++ b/pandas/tests/groupby/methods/test_value_counts.py @@ -423,14 +423,6 @@ def test_compound( tm.assert_frame_equal(result, expected) -@pytest.fixture -def animals_df(): - return DataFrame( - {"key": [1, 1, 1, 1], "num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]}, - index=["falcon", "dog", "cat", "ant"], - ) - - @pytest.mark.parametrize( "sort, ascending, normalize, name, expected_data, expected_index", [ @@ -448,10 +440,14 @@ def animals_df(): ], ) def test_data_frame_value_counts( - animals_df, sort, ascending, normalize, name, expected_data, expected_index + sort, ascending, normalize, name, expected_data, expected_index ): # 3-way compare with :meth:`~DataFrame.value_counts` # Tests from frame/methods/test_value_counts.py + animals_df = DataFrame( + {"key": [1, 1, 1, 1], "num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]}, + index=["falcon", "dog", "cat", "ant"], + ) result_frame = animals_df.value_counts( sort=sort, ascending=ascending, normalize=normalize ) @@ -471,19 +467,6 @@ def test_data_frame_value_counts( tm.assert_series_equal(result_frame_groupby, expected) -@pytest.fixture -def nulls_df(): - n = np.nan - return DataFrame( - { - "A": [1, 1, n, 4, n, 6, 6, 6, 6], - "B": [1, 1, 3, n, n, 6, 6, 6, 6], - "C": [1, 2, 3, 4, 5, 6, n, 8, n], - "D": [1, 2, 3, 4, 5, 6, 7, n, n], - } - ) - - @pytest.mark.parametrize( "group_dropna, count_dropna, expected_rows, expected_values", [ @@ -499,7 +482,7 @@ def nulls_df(): ], ) def test_dropna_combinations( - nulls_df, group_dropna, count_dropna, expected_rows, expected_values, request + group_dropna, count_dropna, expected_rows, expected_values, request ): if Version(np.__version__) >= Version("1.25") and not group_dropna: request.applymarker( @@ -511,6 +494,14 @@ def test_dropna_combinations( strict=False, ) ) + nulls_df = DataFrame( + { + "A": [1, 1, np.nan, 4, np.nan, 6, 6, 6, 6], + "B": [1, 1, 3, np.nan, np.nan, 6, 6, 6, 6], + "C": [1, 2, 3, 4, 5, 6, np.nan, 8, np.nan], + "D": [1, 2, 3, 4, 5, 6, 7, np.nan, np.nan], + } + ) gp = nulls_df.groupby(["A", "B"], dropna=group_dropna) result = gp.value_counts(normalize=True, sort=True, dropna=count_dropna) columns = DataFrame() @@ -521,17 +512,6 @@ def test_dropna_combinations( tm.assert_series_equal(result, expected) -@pytest.fixture -def names_with_nulls_df(nulls_fixture): - return DataFrame( - { - "key": [1, 1, 1, 1], - "first_name": ["John", "Anne", "John", "Beth"], - "middle_name": ["Smith", nulls_fixture, nulls_fixture, "Louise"], - }, - ) - - @pytest.mark.parametrize( "dropna, expected_data, expected_index", [ @@ -560,11 +540,18 @@ def names_with_nulls_df(nulls_fixture): ) @pytest.mark.parametrize("normalize, name", [(False, "count"), (True, "proportion")]) def test_data_frame_value_counts_dropna( - names_with_nulls_df, dropna, normalize, name, expected_data, expected_index + nulls_fixture, dropna, normalize, name, expected_data, expected_index ): # GH 41334 # 3-way compare with :meth:`~DataFrame.value_counts` # Tests with nulls from frame/methods/test_value_counts.py + names_with_nulls_df = DataFrame( + { + "key": [1, 1, 1, 1], + "first_name": ["John", "Anne", "John", "Beth"], + "middle_name": ["Smith", nulls_fixture, nulls_fixture, "Louise"], + }, + ) result_frame = names_with_nulls_df.value_counts(dropna=dropna, normalize=normalize) expected = Series( data=expected_data, diff --git a/pandas/tests/groupby/test_all_methods.py b/pandas/tests/groupby/test_all_methods.py index ad35bec70f668..9560310489a54 100644 --- a/pandas/tests/groupby/test_all_methods.py +++ b/pandas/tests/groupby/test_all_methods.py @@ -18,45 +18,44 @@ import pandas._testing as tm from pandas.tests.groupby import get_groupby_method_args +# def test_multiindex_group_all_columns_when_empty(groupby_func): +# # GH 32464 +# df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"]) +# gb = df.groupby(["a", "b", "c"], group_keys=False) +# method = getattr(gb, groupby_func) +# args = get_groupby_method_args(groupby_func, df) -def test_multiindex_group_all_columns_when_empty(groupby_func): - # GH 32464 - df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"]) - gb = df.groupby(["a", "b", "c"], group_keys=False) - method = getattr(gb, groupby_func) - args = get_groupby_method_args(groupby_func, df) - - warn = FutureWarning if groupby_func == "fillna" else None - warn_msg = "DataFrameGroupBy.fillna is deprecated" - with tm.assert_produces_warning(warn, match=warn_msg): - result = method(*args).index - expected = df.index - tm.assert_index_equal(result, expected) +# warn = FutureWarning if groupby_func == "fillna" else None +# warn_msg = "DataFrameGroupBy.fillna is deprecated" +# with tm.assert_produces_warning(warn, match=warn_msg): +# result = method(*args).index +# expected = df.index +# tm.assert_index_equal(result, expected) -def test_duplicate_columns(request, groupby_func, as_index): - # GH#50806 - if groupby_func == "corrwith": - msg = "GH#50845 - corrwith fails when there are duplicate columns" - request.applymarker(pytest.mark.xfail(reason=msg)) - df = DataFrame([[1, 3, 6], [1, 4, 7], [2, 5, 8]], columns=list("abb")) - args = get_groupby_method_args(groupby_func, df) - gb = df.groupby("a", as_index=as_index) - warn = FutureWarning if groupby_func == "fillna" else None - warn_msg = "DataFrameGroupBy.fillna is deprecated" - with tm.assert_produces_warning(warn, match=warn_msg): - result = getattr(gb, groupby_func)(*args) +# def test_duplicate_columns(request, groupby_func, as_index): +# # GH#50806 +# if groupby_func == "corrwith": +# msg = "GH#50845 - corrwith fails when there are duplicate columns" +# request.applymarker(pytest.mark.xfail(reason=msg)) +# df = DataFrame([[1, 3, 6], [1, 4, 7], [2, 5, 8]], columns=list("abb")) +# args = get_groupby_method_args(groupby_func, df) +# gb = df.groupby("a", as_index=as_index) +# warn = FutureWarning if groupby_func == "fillna" else None +# warn_msg = "DataFrameGroupBy.fillna is deprecated" +# with tm.assert_produces_warning(warn, match=warn_msg): +# result = getattr(gb, groupby_func)(*args) - expected_df = df.set_axis(["a", "b", "c"], axis=1) - expected_args = get_groupby_method_args(groupby_func, expected_df) - expected_gb = expected_df.groupby("a", as_index=as_index) - warn = FutureWarning if groupby_func == "fillna" else None - warn_msg = "DataFrameGroupBy.fillna is deprecated" - with tm.assert_produces_warning(warn, match=warn_msg): - expected = getattr(expected_gb, groupby_func)(*expected_args) - if groupby_func not in ("size", "ngroup", "cumcount"): - expected = expected.rename(columns={"c": "b"}) - tm.assert_equal(result, expected) +# expected_df = df.set_axis(["a", "b", "c"], axis=1) +# expected_args = get_groupby_method_args(groupby_func, expected_df) +# expected_gb = expected_df.groupby("a", as_index=as_index) +# warn = FutureWarning if groupby_func == "fillna" else None +# warn_msg = "DataFrameGroupBy.fillna is deprecated" +# with tm.assert_produces_warning(warn, match=warn_msg): +# expected = getattr(expected_gb, groupby_func)(*expected_args) +# if groupby_func not in ("size", "ngroup", "cumcount"): +# expected = expected.rename(columns={"c": "b"}) +# tm.assert_equal(result, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/groupby/test_index_as_string.py b/pandas/tests/groupby/test_index_as_string.py index 4aaf3de9a23b2..6d0e07b19b665 100644 --- a/pandas/tests/groupby/test_index_as_string.py +++ b/pandas/tests/groupby/test_index_as_string.py @@ -5,38 +5,6 @@ import pandas._testing as tm -@pytest.fixture(params=[["inner"], ["inner", "outer"]]) -def frame(request): - levels = request.param - df = pd.DataFrame( - { - "outer": ["a", "a", "a", "b", "b", "b"], - "inner": [1, 2, 3, 1, 2, 3], - "A": np.arange(6), - "B": ["one", "one", "two", "two", "one", "one"], - } - ) - if levels: - df = df.set_index(levels) - - return df - - -@pytest.fixture() -def series(): - df = pd.DataFrame( - { - "outer": ["a", "a", "a", "b", "b", "b"], - "inner": [1, 2, 3, 1, 2, 3], - "A": np.arange(6), - "B": ["one", "one", "two", "two", "one", "one"], - } - ) - s = df.set_index(["outer", "inner", "B"])["A"] - - return s - - @pytest.mark.parametrize( "key_strs,groupers", [ @@ -46,7 +14,17 @@ def series(): (["inner", "B"], [pd.Grouper(level="inner"), "B"]), # Index and column ], ) -def test_grouper_index_level_as_string(frame, key_strs, groupers): +@pytest.mark.parametrize("levels", [["inner"], ["inner", "outer"]]) +def test_grouper_index_level_as_string(levels, key_strs, groupers): + frame = pd.DataFrame( + { + "outer": ["a", "a", "a", "b", "b", "b"], + "inner": [1, 2, 3, 1, 2, 3], + "A": np.arange(6), + "B": ["one", "one", "two", "two", "one", "one"], + } + ) + frame = frame.set_index(levels) if "B" not in key_strs or "outer" in frame.columns: result = frame.groupby(key_strs).mean(numeric_only=True) expected = frame.groupby(groupers).mean(numeric_only=True) @@ -73,6 +51,15 @@ def test_grouper_index_level_as_string(frame, key_strs, groupers): ) def test_grouper_index_level_as_string_series(series, levels): # Compute expected result + df = pd.DataFrame( + { + "outer": ["a", "a", "a", "b", "b", "b"], + "inner": [1, 2, 3, 1, 2, 3], + "A": np.arange(6), + "B": ["one", "one", "two", "two", "one", "one"], + } + ) + series = df.set_index(["outer", "inner", "B"])["A"] if isinstance(levels, list): groupers = [pd.Grouper(level=lv) for lv in levels] else: diff --git a/pandas/tests/groupby/test_indexing.py b/pandas/tests/groupby/test_indexing.py index 664c52babac13..4974e13a1b6ea 100644 --- a/pandas/tests/groupby/test_indexing.py +++ b/pandas/tests/groupby/test_indexing.py @@ -118,15 +118,26 @@ def test_doc_examples(): tm.assert_frame_equal(result, expected) -@pytest.fixture() -def multiindex_data(): +def test_multiindex(): + # Test the multiindex mentioned as the use-case in the documentation + + def _make_df_from_data(data): + rows = {} + for date in data: + for level in data[date]: + rows[(date, level[0])] = {"A": level[1], "B": level[2]} + + df = pd.DataFrame.from_dict(rows, orient="index") + df.index.names = ("Date", "Item") + return df + rng = np.random.default_rng(2) ndates = 100 nitems = 20 dates = pd.date_range("20130101", periods=ndates, freq="D") items = [f"item {i}" for i in range(nitems)] - data = {} + multiindex_data = {} for date in dates: nitems_for_date = nitems - rng.integers(0, 12) levels = [ @@ -134,24 +145,8 @@ def multiindex_data(): for item in items[:nitems_for_date] ] levels.sort(key=lambda x: x[1]) - data[date] = levels - - return data - + multiindex_data[date] = levels -def _make_df_from_data(data): - rows = {} - for date in data: - for level in data[date]: - rows[(date, level[0])] = {"A": level[1], "B": level[2]} - - df = pd.DataFrame.from_dict(rows, orient="index") - df.index.names = ("Date", "Item") - return df - - -def test_multiindex(multiindex_data): - # Test the multiindex mentioned as the use-case in the documentation df = _make_df_from_data(multiindex_data) result = df.groupby("Date", as_index=False).nth(slice(3, -3)) @@ -271,15 +266,11 @@ def test_step(step): tm.assert_frame_equal(result, expected) -@pytest.fixture() -def column_group_df(): - return pd.DataFrame( +def test_column_axis(): + column_group_df = pd.DataFrame( [[0, 1, 2, 3, 4, 5, 6], [0, 0, 1, 0, 1, 0, 2]], columns=["A", "B", "C", "D", "E", "F", "G"], ) - - -def test_column_axis(column_group_df): msg = "DataFrame.groupby with axis=1" with tm.assert_produces_warning(FutureWarning, match=msg): g = column_group_df.groupby(column_group_df.iloc[1], axis=1) diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py index 0b451ce73db89..738711019b5fd 100644 --- a/pandas/tests/groupby/test_raises.py +++ b/pandas/tests/groupby/test_raises.py @@ -67,19 +67,6 @@ def df_with_datetime_col(): return df -@pytest.fixture -def df_with_timedelta_col(): - df = DataFrame( - { - "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], - "b": [3, 3, 4, 4, 4, 4, 4, 3, 3], - "c": range(9), - "d": datetime.timedelta(days=1), - } - ) - return df - - @pytest.fixture def df_with_cat_col(): df = DataFrame( @@ -353,8 +340,15 @@ def test_groupby_raises_datetime_np( @pytest.mark.parametrize("func", ["prod", "cumprod", "skew", "var"]) -def test_groupby_raises_timedelta(func, df_with_timedelta_col): - df = df_with_timedelta_col +def test_groupby_raises_timedelta(func): + df = DataFrame( + { + "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], + "b": [3, 3, 4, 4, 4, 4, 4, 3, 3], + "c": range(9), + "d": datetime.timedelta(days=1), + } + ) gb = df.groupby(by="a") _call_and_check( diff --git a/pandas/tests/indexes/datetimelike_/test_nat.py b/pandas/tests/indexes/datetimelike_/test_nat.py index 50cf29d016355..47a5e4638e69d 100644 --- a/pandas/tests/indexes/datetimelike_/test_nat.py +++ b/pandas/tests/indexes/datetimelike_/test_nat.py @@ -10,44 +10,33 @@ import pandas._testing as tm -class NATests: - def test_nat(self, index_without_na): - empty_index = index_without_na[:0] - - index_with_na = index_without_na.copy(deep=True) - index_with_na._data[1] = NaT - - assert empty_index._na_value is NaT - assert index_with_na._na_value is NaT - assert index_without_na._na_value is NaT - - idx = index_without_na - assert idx._can_hold_na - - tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) - assert idx.hasnans is False - - idx = index_with_na - assert idx._can_hold_na - - tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) - assert idx.hasnans is True +@pytest.mark.parameterize( + "index_without_na", + [ + TimedeltaIndex(["1 days", "2 days"]), + PeriodIndex(["2011-01-01", "2011-01-02"], freq="D"), + DatetimeIndex(["2011-01-01", "2011-01-02"]), + DatetimeIndex(["2011-01-01", "2011-01-02"], tz="UTC"), + ], +) +def test_nat(self, index_without_na): + empty_index = index_without_na[:0] + index_with_na = index_without_na.copy(deep=True) + index_with_na._data[1] = NaT -class TestDatetimeIndexNA(NATests): - @pytest.fixture - def index_without_na(self, tz_naive_fixture): - tz = tz_naive_fixture - return DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) + assert empty_index._na_value is NaT + assert index_with_na._na_value is NaT + assert index_without_na._na_value is NaT + idx = index_without_na + assert idx._can_hold_na -class TestTimedeltaIndexNA(NATests): - @pytest.fixture - def index_without_na(self): - return TimedeltaIndex(["1 days", "2 days"]) + tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) + assert idx.hasnans is False + idx = index_with_na + assert idx._can_hold_na -class TestPeriodIndexNA(NATests): - @pytest.fixture - def index_without_na(self): - return PeriodIndex(["2011-01-01", "2011-01-02"], freq="D") + tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) + assert idx.hasnans is True diff --git a/pandas/tests/indexes/interval/test_constructors.py b/pandas/tests/indexes/interval/test_constructors.py index 778c07b46e57c..c4bfd9c8e0c1d 100644 --- a/pandas/tests/indexes/interval/test_constructors.py +++ b/pandas/tests/indexes/interval/test_constructors.py @@ -23,11 +23,6 @@ import pandas.core.common as com -@pytest.fixture(params=[None, "foo"]) -def name(request): - return request.param - - class ConstructorTests: """ Common tests for all variations of IntervalIndex construction. Input data @@ -35,8 +30,9 @@ class ConstructorTests: get_kwargs_from_breaks to the expected format. """ - @pytest.fixture( - params=[ + @pytest.mark.parameterize( + "breaks_and_expected_subtype", + [ ([3, 14, 15, 92, 653], np.int64), (np.arange(10, dtype="int64"), np.int64), (Index(np.arange(-10, 11, dtype=np.int64)), np.int64), @@ -48,11 +44,9 @@ class ConstructorTests: "datetime64[ns, US/Eastern]", ), (timedelta_range("1 day", periods=10), " Date: Sat, 23 Dec 2023 21:28:11 -0800 Subject: [PATCH 04/12] Revert "Use less idx fixture in multi" This reverts commit e4f81bb3aff832bc1f23a109624b792e5f08f6e2. --- pandas/tests/indexes/multi/test_analytics.py | 17 ++++----- pandas/tests/indexes/multi/test_astype.py | 7 ++-- pandas/tests/indexes/multi/test_compat.py | 6 ++-- .../tests/indexes/multi/test_constructors.py | 3 +- pandas/tests/indexes/multi/test_conversion.py | 3 +- pandas/tests/indexes/multi/test_copy.py | 9 ++--- pandas/tests/indexes/multi/test_duplicates.py | 10 ++---- .../tests/indexes/multi/test_equivalence.py | 21 +++++------ pandas/tests/indexes/multi/test_formats.py | 6 ++-- pandas/tests/indexes/multi/test_get_set.py | 3 +- pandas/tests/indexes/multi/test_integrity.py | 17 ++++----- pandas/tests/indexes/multi/test_join.py | 3 +- pandas/tests/indexes/multi/test_missing.py | 10 +++--- pandas/tests/indexes/multi/test_reindex.py | 10 ++---- pandas/tests/indexes/multi/test_reshape.py | 18 +++------- pandas/tests/indexes/multi/test_setops.py | 36 ++++++------------- pandas/tests/indexes/multi/test_sorting.py | 31 ++++++++++++---- pandas/tests/indexes/multi/test_take.py | 6 +--- 18 files changed, 83 insertions(+), 133 deletions(-) diff --git a/pandas/tests/indexes/multi/test_analytics.py b/pandas/tests/indexes/multi/test_analytics.py index a0e7510fbe2dc..87f1439db5fc8 100644 --- a/pandas/tests/indexes/multi/test_analytics.py +++ b/pandas/tests/indexes/multi/test_analytics.py @@ -11,15 +11,13 @@ import pandas._testing as tm -def test_infer_objects(): - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) +def test_infer_objects(idx): with pytest.raises(NotImplementedError, match="to_frame"): idx.infer_objects() -def test_shift(): +def test_shift(idx): # GH8083 test the base class for shift - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) msg = ( "This method is only implemented for DatetimeIndex, PeriodIndex and " "TimedeltaIndex; Got type MultiIndex" @@ -78,9 +76,8 @@ def test_truncate_multiindex(): # TODO: reshape -def test_reorder_levels(): +def test_reorder_levels(idx): # this blows up - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) with pytest.raises(IndexError, match="^Too many levels"): idx.reorder_levels([2, 1, 0]) @@ -177,9 +174,9 @@ def test_sub(idx): first.tolist() - idx[-3:] -def test_map(): +def test_map(idx): # callable - index = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) + index = idx result = index.map(lambda x: x) tm.assert_index_equal(result, index) @@ -238,11 +235,10 @@ def test_map_dictlike(idx, mapper): ], ids=lambda func: func.__name__, ) -def test_numpy_ufuncs(func): +def test_numpy_ufuncs(idx, func): # test ufuncs of numpy. see: # https://numpy.org/doc/stable/reference/ufuncs.html - idx = MultiIndex(levels=[["A", "B"]], codes=[[0, 1]]) expected_exception = TypeError msg = ( "loop of ufunc does not support argument 0 of type tuple which " @@ -258,7 +254,6 @@ def test_numpy_ufuncs(func): ids=lambda func: func.__name__, ) def test_numpy_type_funcs(idx, func): - idx = MultiIndex(levels=[["A", "B"]], codes=[[0, 1]]) msg = ( f"ufunc '{func.__name__}' not supported for the input types, and the inputs " "could not be safely coerced to any supported types according to " diff --git a/pandas/tests/indexes/multi/test_astype.py b/pandas/tests/indexes/multi/test_astype.py index 1f9f8d91ad970..29908537fbe59 100644 --- a/pandas/tests/indexes/multi/test_astype.py +++ b/pandas/tests/indexes/multi/test_astype.py @@ -3,12 +3,10 @@ from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas import MultiIndex import pandas._testing as tm -def test_astype(): - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]], names=["foo"]) +def test_astype(idx): expected = idx.copy() actual = idx.astype("O") tm.assert_copy(actual.levels, expected.levels) @@ -20,8 +18,7 @@ def test_astype(): @pytest.mark.parametrize("ordered", [True, False]) -def test_astype_category(ordered): - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) +def test_astype_category(idx, ordered): # GH 18630 msg = "> 1 ndim Categorical are not supported at this time" with pytest.raises(NotImplementedError, match=msg): diff --git a/pandas/tests/indexes/multi/test_compat.py b/pandas/tests/indexes/multi/test_compat.py index 4bc73f8272d08..27a8c6e9b7158 100644 --- a/pandas/tests/indexes/multi/test_compat.py +++ b/pandas/tests/indexes/multi/test_compat.py @@ -6,8 +6,7 @@ import pandas._testing as tm -def test_numeric_compat(): - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) +def test_numeric_compat(idx): with pytest.raises(TypeError, match="cannot perform __mul__"): idx * 1 @@ -30,8 +29,7 @@ def test_numeric_compat(): @pytest.mark.parametrize("method", ["all", "any", "__invert__"]) -def test_logical_compat(method): - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) +def test_logical_compat(idx, method): msg = f"cannot perform {method}" with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index 0ad8831805087..8456e6a7acba5 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -385,8 +385,7 @@ def test_from_tuples_empty(): tm.assert_index_equal(result, expected) -def test_from_tuples_index_values(): - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) +def test_from_tuples_index_values(idx): result = MultiIndex.from_tuples(idx) assert (result.values == idx.values).all() diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index 0109d67cb7dde..3c2ca045d6f99 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -9,8 +9,7 @@ import pandas._testing as tm -def test_to_numpy(): - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) +def test_to_numpy(idx): result = idx.to_numpy() exp = idx.values tm.assert_numpy_array_equal(result, exp) diff --git a/pandas/tests/indexes/multi/test_copy.py b/pandas/tests/indexes/multi/test_copy.py index 504496ea527cc..2e09a580f9528 100644 --- a/pandas/tests/indexes/multi/test_copy.py +++ b/pandas/tests/indexes/multi/test_copy.py @@ -26,22 +26,19 @@ def assert_multiindex_copied(copy, original): assert copy.sortorder == original.sortorder -def test_copy(): - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) +def test_copy(idx): i_copy = idx.copy() assert_multiindex_copied(i_copy, idx) -def test_shallow_copy(): - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) +def test_shallow_copy(idx): i_copy = idx._view() assert_multiindex_copied(i_copy, idx) -def test_view(): - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) +def test_view(idx): i_view = idx.view() assert_multiindex_copied(i_view, idx) diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py index faf83407cfa50..6c6d9022b1af3 100644 --- a/pandas/tests/indexes/multi/test_duplicates.py +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -145,19 +145,13 @@ def test_duplicate_meta_data(): assert idx.drop_duplicates().names == idx.names -def test_has_duplicates(): - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) +def test_has_duplicates(idx, idx_dup): + # see fixtures assert idx.is_unique is True assert idx.has_duplicates is False - - -def test_has_duplicates_with_dups(idx_dup): - # see fixtures assert idx_dup.is_unique is False assert idx_dup.has_duplicates is True - -def test_has_duplicates_other(): mi = MultiIndex( levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]] ) diff --git a/pandas/tests/indexes/multi/test_equivalence.py b/pandas/tests/indexes/multi/test_equivalence.py index 079f6194c2ff9..9babbd5b8d56d 100644 --- a/pandas/tests/indexes/multi/test_equivalence.py +++ b/pandas/tests/indexes/multi/test_equivalence.py @@ -12,8 +12,7 @@ import pandas._testing as tm -def test_equals(): - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) +def test_equals(idx): assert idx.equals(idx) assert idx.equals(idx.copy()) assert idx.equals(idx.astype(object)) @@ -27,6 +26,10 @@ def test_equals(): assert idx.equals(same_values) assert same_values.equals(idx) + if idx.nlevels == 1: + # do not test MultiIndex + assert not idx.equals(Series(idx)) + def test_equals_op(idx): # GH9947, GH10637 @@ -129,8 +132,7 @@ def test_compare_tuple_strs(): tm.assert_numpy_array_equal(result, expected) -def test_equals_multi(): - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) +def test_equals_multi(idx): assert idx.equals(idx) assert not idx.equals(idx.values) assert idx.equals(Index(idx.values)) @@ -139,8 +141,6 @@ def test_equals_multi(): assert not idx.equals(idx[:-1]) assert not idx.equals(idx[-1]) - -def test_equals_multi_different_levels(idx): # different number of levels index = MultiIndex( levels=[Index(list(range(4))), Index(list(range(4))), Index(list(range(4)))], @@ -181,8 +181,7 @@ def test_equals_multi_different_levels(idx): assert not idx.equals(index) -def test_identical(): - idx = MultiIndex(levels=[[0, 1], [2, 3]], codes=[[0, 1], [0, 1]]) +def test_identical(idx): mi = idx.copy() mi2 = idx.copy() assert mi.identical(mi2) @@ -250,14 +249,12 @@ def test_is_(): assert not mi5.is_(mi) -def test_is_all_dates(): - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) +def test_is_all_dates(idx): assert not idx._is_all_dates -def test_is_numeric(): +def test_is_numeric(idx): # MultiIndex is never numeric - idx = MultiIndex(levels=[["A", "B"]], codes=[[0, 1]]) assert not is_any_real_numeric_dtype(idx) diff --git a/pandas/tests/indexes/multi/test_formats.py b/pandas/tests/indexes/multi/test_formats.py index b3988e824775b..52ff3109128f2 100644 --- a/pandas/tests/indexes/multi/test_formats.py +++ b/pandas/tests/indexes/multi/test_formats.py @@ -9,9 +9,8 @@ import pandas._testing as tm -def test_format(): +def test_format(idx): msg = "MultiIndex.format is deprecated" - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) with tm.assert_produces_warning(FutureWarning, match=msg): idx.format() idx[:0].format() @@ -71,9 +70,8 @@ def test_unicode_string_with_unicode(): str(idx) -def test_repr_max_seq_item_setting(): +def test_repr_max_seq_item_setting(idx): # GH10182 - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) idx = idx.repeat(50) with pd.option_context("display.max_seq_items", None): repr(idx) diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index 969cbf6fb8cd9..6eeaeb6711d03 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -242,8 +242,7 @@ def test_set_codes(idx): assert result.equals(expected) -def test_set_levels_codes_names_bad_input(): - idx = MultiIndex(levels=[["A", "B"], ["B", "C"]], codes=[[0, 1], [0, 1]]) +def test_set_levels_codes_names_bad_input(idx): levels, codes = idx.levels, idx.codes names = idx.names diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py index 93262da50b0f6..d956747cbc859 100644 --- a/pandas/tests/indexes/multi/test_integrity.py +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -163,11 +163,10 @@ def test_take_invalid_kwargs(): idx.take(indices, mode="clip") -def test_isna_behavior(): +def test_isna_behavior(idx): # should not segfault GH5123 # NOTE: if MI representation changes, may make sense to allow # isna(MI) - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) msg = "isna is not defined for MultiIndex" with pytest.raises(NotImplementedError, match=msg): pd.isna(idx) @@ -209,14 +208,12 @@ def test_mi_hashtable_populated_attribute_error(monkeypatch): df["a"].foo() -def test_can_hold_identifiers(): - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) +def test_can_hold_identifiers(idx): key = idx[0] assert idx._can_hold_identifiers_and_holds_name(key) is True -def test_metadata_immutable(): - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) +def test_metadata_immutable(idx): levels, codes = idx.levels, idx.codes # shouldn't be able to set at either the top level or base level mutable_regex = re.compile("does not support mutable operations") @@ -268,8 +265,7 @@ def test_rangeindex_fallback_coercion_bug(): tm.assert_index_equal(result, expected) -def test_memory_usage(): - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) +def test_memory_usage(idx): result = idx.memory_usage() if len(idx): idx.get_loc(idx[0]) @@ -289,6 +285,5 @@ def test_memory_usage(): assert result == 0 -def test_nlevels(): - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) - assert idx.nlevels == 1 +def test_nlevels(idx): + assert idx.nlevels == 2 diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py index 1b88ef790723c..edd0feaaa1159 100644 --- a/pandas/tests/indexes/multi/test_join.py +++ b/pandas/tests/indexes/multi/test_join.py @@ -50,8 +50,7 @@ def test_join_level_corner_case(idx): idx.join(idx, level=1) -def test_join_self(join_type): - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) +def test_join_self(idx, join_type): result = idx.join(idx, how=join_type) expected = idx if join_type == "outer": diff --git a/pandas/tests/indexes/multi/test_missing.py b/pandas/tests/indexes/multi/test_missing.py index 5e3c545f6a35f..14ffc42fb4b59 100644 --- a/pandas/tests/indexes/multi/test_missing.py +++ b/pandas/tests/indexes/multi/test_missing.py @@ -6,9 +6,8 @@ import pandas._testing as tm -def test_fillna(): +def test_fillna(idx): # GH 11343 - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) msg = "isna is not defined for MultiIndex" with pytest.raises(NotImplementedError, match=msg): idx.fillna(idx[0]) @@ -54,19 +53,18 @@ def test_dropna(): tm.assert_index_equal(idx.dropna(how="all"), expected) -def test_nulls(): +def test_nulls(idx): # this is really a smoke test for the methods # as these are adequately tested for function elsewhere - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) + msg = "isna is not defined for MultiIndex" with pytest.raises(NotImplementedError, match=msg): idx.isna() @pytest.mark.xfail(reason="isna is not defined for MultiIndex") -def test_hasnans_isnans(): +def test_hasnans_isnans(idx): # GH 11343, added tests for hasnans / isnans - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) index = idx.copy() # cases in indices doesn't include NaN diff --git a/pandas/tests/indexes/multi/test_reindex.py b/pandas/tests/indexes/multi/test_reindex.py index 007480db1de1b..d1b4fe8b98760 100644 --- a/pandas/tests/indexes/multi/test_reindex.py +++ b/pandas/tests/indexes/multi/test_reindex.py @@ -9,12 +9,7 @@ import pandas._testing as tm -def test_reindex(): - idx = MultiIndex( - levels=[list(range(5)), list(range(1, 6))], - codes=[list(range(5)), list(range(5))], - names=["first", "second"], - ) +def test_reindex(idx): result, indexer = idx.reindex(list(idx[:4])) assert isinstance(result, MultiIndex) assert result.names == ["first", "second"] @@ -97,8 +92,7 @@ def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array( assert mi.reindex([], level=1)[0].levels[1].dtype == dti.dtype -def test_reindex_base(): - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) +def test_reindex_base(idx): expected = np.arange(idx.size, dtype=np.intp) actual = idx.get_indexer(idx) diff --git a/pandas/tests/indexes/multi/test_reshape.py b/pandas/tests/indexes/multi/test_reshape.py index 13d5bd1ae8d65..06dbb33aadf97 100644 --- a/pandas/tests/indexes/multi/test_reshape.py +++ b/pandas/tests/indexes/multi/test_reshape.py @@ -12,12 +12,7 @@ import pandas._testing as tm -def test_insert(): - idx = MultiIndex( - levels=[["bar", "foo"], ["two", "one"]], - codes=[[0, 1], [0, 1]], - names=["first", "second"], - ) +def test_insert(idx): # key contained in all levels new_index = idx.insert(0, ("bar", "two")) assert new_index.equal_levels(idx) @@ -39,8 +34,6 @@ def test_insert(): with pytest.raises(ValueError, match=msg): idx.insert(0, ("foo2",)) - -def test_insert_reindex(): left = pd.DataFrame([["a", "b", 0], ["b", "d", 1]], columns=["1st", "2nd", "3rd"]) left.set_index(["1st", "2nd"], inplace=True) ts = left["3rd"].copy(deep=True) @@ -97,8 +90,7 @@ def test_insert2(): tm.assert_series_equal(left, right) -def test_append(): - idx = MultiIndex(levels=[list(range(5))], codes=[list(range(5))]) +def test_append(idx): result = idx[:3].append(idx[3:]) assert result.equals(idx) @@ -209,16 +201,14 @@ def test_repeat(): tm.assert_index_equal(m.repeat(reps), expected) -def test_insert_base(): - idx = MultiIndex(levels=[list(range(5))], codes=[list(range(5))]) +def test_insert_base(idx): result = idx[1:4] # test 0th element assert idx[0:4].equals(result.insert(0, idx[0])) -def test_delete_base(): - idx = MultiIndex(levels=[list(range(6))], codes=[list(range(6))]) +def test_delete_base(idx): expected = idx[1:] result = idx.delete(0) assert result.equals(expected) diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index 025381616743a..0abb56ecf9de7 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -21,17 +21,15 @@ @pytest.mark.parametrize( "method", ["intersection", "union", "difference", "symmetric_difference"] ) -def test_set_ops_error_cases(case, sort, method): +def test_set_ops_error_cases(idx, case, sort, method): # non-iterable input - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) msg = "Input must be Index or array-like" with pytest.raises(TypeError, match=msg): getattr(idx, method)(case, sort=sort) @pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list]) -def test_intersection_base(sort, klass): - idx = MultiIndex(levels=[list(range(5))], codes=[list(range(5))]) +def test_intersection_base(idx, sort, klass): first = idx[2::-1] # first 3 elements reversed second = idx[:5] @@ -52,8 +50,7 @@ def test_intersection_base(sort, klass): @pytest.mark.arm_slow @pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list]) -def test_union_base(sort, klass): - idx = MultiIndex(levels=[list(range(5))], codes=[list(range(5))]) +def test_union_base(idx, sort, klass): first = idx[::-1] second = idx[:5] @@ -72,8 +69,7 @@ def test_union_base(sort, klass): first.union([1, 2, 3], sort=sort) -def test_difference_base(sort): - idx = MultiIndex(levels=[list(range(5))], codes=[list(range(5))]) +def test_difference_base(idx, sort): second = idx[4:] answer = idx[:4] result = idx.difference(second, sort=sort) @@ -95,8 +91,7 @@ def test_difference_base(sort): idx.difference([1, 2, 3], sort=sort) -def test_symmetric_difference(sort): - idx = MultiIndex(levels=[list(range(5))], codes=[list(range(5))]) +def test_symmetric_difference(idx, sort): first = idx[1:] second = idx[:-1] answer = idx[[-1, 0]] @@ -129,18 +124,13 @@ def test_multiindex_symmetric_difference(): assert result.names == [None, None] -def test_empty(): +def test_empty(idx): # GH 15270 - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) assert not idx.empty assert idx[:0].empty -def test_difference(sort): - idx = MultiIndex( - levels=[list(range(5)), list(range(1, 6))], - codes=[list(range(5)), list(range(5))], - ) +def test_difference(idx, sort): first = idx result = first.difference(idx[-3:], sort=sort) vals = idx[:-3].values @@ -247,8 +237,7 @@ def test_difference_sort_incomparable_true(): idx.difference(other, sort=True) -def test_union(sort): - idx = MultiIndex(levels=[list(range(5))], codes=[list(range(5))]) +def test_union(idx, sort): piece1 = idx[:5][::-1] piece2 = idx[3:] @@ -293,8 +282,7 @@ def test_union_with_regular_index(idx, using_infer_string): assert not result.equals(result2) -def test_intersection(sort): - idx = MultiIndex(levels=[list(range(5))], codes=[list(range(5))]) +def test_intersection(idx, sort): piece1 = idx[:5][::-1] piece2 = idx[3:] @@ -322,8 +310,7 @@ def test_intersection(sort): @pytest.mark.parametrize( "method", ["intersection", "union", "difference", "symmetric_difference"] ) -def test_setop_with_categorical(sort, method): - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) +def test_setop_with_categorical(idx, sort, method): other = idx.to_flat_index().astype("category") res_names = [None] * idx.nlevels @@ -336,8 +323,7 @@ def test_setop_with_categorical(sort, method): tm.assert_index_equal(result, expected) -def test_intersection_non_object(sort): - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) +def test_intersection_non_object(idx, sort): other = Index(range(3), name="foo") result = idx.intersection(other, sort=sort) diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 4748016d2943e..b4dcef71dcf50 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -7,9 +7,11 @@ ) from pandas import ( + CategoricalIndex, DataFrame, Index, MultiIndex, + RangeIndex, Series, Timestamp, ) @@ -17,8 +19,7 @@ from pandas.core.indexes.frozen import FrozenList -def test_sortlevel(): - idx = MultiIndex(levels=[[0, 1], [1, 2]], codes=[[0, 1], [0, 1]]) +def test_sortlevel(idx): tuples = list(idx) np.random.default_rng(2).shuffle(tuples) @@ -82,12 +83,31 @@ def test_sortlevel_na_position(): tm.assert_index_equal(result, expected) -def test_numpy_argsort(): - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) +def test_numpy_argsort(idx): result = np.argsort(idx) expected = idx.argsort() tm.assert_numpy_array_equal(result, expected) + # these are the only two types that perform + # pandas compatibility input validation - the + # rest already perform separate (or no) such + # validation via their 'values' attribute as + # defined in pandas.core.indexes/base.py - they + # cannot be changed at the moment due to + # backwards compatibility concerns + if isinstance(type(idx), (CategoricalIndex, RangeIndex)): + msg = "the 'axis' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(idx, axis=1) + + msg = "the 'kind' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(idx, kind="mergesort") + + msg = "the 'order' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(idx, order=("a", "b")) + def test_unsortedindex(): # GH 11897 @@ -257,8 +277,7 @@ def test_remove_unused_nan(level0, level1): assert "unused" not in result.levels[level] -def test_argsort(): - idx = MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) +def test_argsort(idx): result = idx.argsort() expected = idx.values.argsort() tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_take.py b/pandas/tests/indexes/multi/test_take.py index 6ae927137a304..543cba25c373b 100644 --- a/pandas/tests/indexes/multi/test_take.py +++ b/pandas/tests/indexes/multi/test_take.py @@ -11,17 +11,13 @@ def test_take(idx): expected = idx[indexer] assert result.equals(expected) - -def test_freq(): # GH 10791 - idx = pd.MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) msg = "'MultiIndex' object has no attribute 'freq'" with pytest.raises(AttributeError, match=msg): idx.freq -def test_take_invalid_kwargs(): - idx = pd.MultiIndex(levels=[[0, 1]], codes=[[0, 1]]) +def test_take_invalid_kwargs(idx): indices = [1, 2] msg = r"take\(\) got an unexpected keyword argument 'foo'" From e9a97c220caf8d2b3f6d8e01a44fd8ebbcab5476 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 25 Dec 2023 12:21:53 -0800 Subject: [PATCH 05/12] Remove more single use fixtures --- pandas/conftest.py | 10 - .../tests/indexes/interval/test_interval.py | 6 +- pandas/tests/io/formats/test_format.py | 73 +- .../json/test_json_table_schema_ext_dtype.py | 22 +- pandas/tests/io/json/test_normalize.py | 47 +- pandas/tests/io/json/test_pandas.py | 21 +- pandas/tests/io/sas/test_xport.py | 46 +- pandas/tests/reshape/merge/test_merge.py | 120 +-- pandas/tests/reshape/merge/test_merge_asof.py | 962 +++++++++--------- pandas/tests/reshape/test_pivot.py | 13 +- .../series/methods/test_convert_dtypes.py | 339 +++--- pandas/tests/series/methods/test_nlargest.py | 97 +- pandas/tests/tools/test_to_datetime.py | 48 +- pandas/tests/tseries/offsets/test_common.py | 31 +- .../offsets/test_custom_business_month.py | 56 +- .../util/test_assert_produces_warning.py | 32 +- 16 files changed, 854 insertions(+), 1069 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 983272d79081e..b4aaaddee75df 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1777,16 +1777,6 @@ def ip(): return InteractiveShell(config=c) -@pytest.fixture(params=["bsr", "coo", "csc", "csr", "dia", "dok", "lil"]) -def spmatrix(request): - """ - Yields scipy sparse matrix classes. - """ - sparse = pytest.importorskip("scipy.sparse") - - return getattr(sparse, request.param + "_matrix") - - @pytest.fixture( params=[ getattr(pd.offsets, o) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 63f9b2176dddd..cd0bac1a7945a 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -21,11 +21,6 @@ import pandas.core.common as com -@pytest.fixture(params=[None, "foo"]) -def name(request): - return request.param - - class TestIntervalIndex: index = IntervalIndex.from_arrays([0, 1], [1, 2]) @@ -865,6 +860,7 @@ def test_nbytes(self): expected = 64 # 4 * 8 * 2 assert result == expected + @pytest.mark.parametrize("name", [None, "foo"]) @pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"]) def test_set_closed(self, name, closed, new_closed): # GH 21670 diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 0ca29c219b55b..43e94b8c55589 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -4,7 +4,6 @@ """ from datetime import datetime from io import StringIO -from pathlib import Path import re from shutil import get_terminal_size @@ -32,55 +31,6 @@ import pandas.io.formats.format as fmt -@pytest.fixture(params=["string", "pathlike", "buffer"]) -def filepath_or_buffer_id(request): - """ - A fixture yielding test ids for filepath_or_buffer testing. - """ - return request.param - - -@pytest.fixture -def filepath_or_buffer(filepath_or_buffer_id, tmp_path): - """ - A fixture yielding a string representing a filepath, a path-like object - and a StringIO buffer. Also checks that buffer is not closed. - """ - if filepath_or_buffer_id == "buffer": - buf = StringIO() - yield buf - assert not buf.closed - else: - assert isinstance(tmp_path, Path) - if filepath_or_buffer_id == "pathlike": - yield tmp_path / "foo" - else: - yield str(tmp_path / "foo") - - -@pytest.fixture -def assert_filepath_or_buffer_equals( - filepath_or_buffer, filepath_or_buffer_id, encoding -): - """ - Assertion helper for checking filepath_or_buffer. - """ - if encoding is None: - encoding = "utf-8" - - def _assert_filepath_or_buffer_equals(expected): - if filepath_or_buffer_id == "string": - with open(filepath_or_buffer, encoding=encoding) as f: - result = f.read() - elif filepath_or_buffer_id == "pathlike": - result = filepath_or_buffer.read_text(encoding=encoding) - elif filepath_or_buffer_id == "buffer": - result = filepath_or_buffer.getvalue() - assert result == expected - - return _assert_filepath_or_buffer_equals - - def has_info_repr(df): r = repr(df) c1 = r.split("\n")[0].startswith("" - assert repr(offset2) == "<2 * CustomBusinessMonthBegins>" + def test_repr(self): + assert repr(CBMonthBegin()) == "" + assert repr(CBMonthBegin(2)) == "<2 * CustomBusinessMonthBegins>" - def test_add_datetime(self, dt, offset2): - assert offset2 + dt == datetime(2008, 3, 3) + def test_add_datetime(self, dt): + assert CBMonthBegin(2) + dt == datetime(2008, 3, 3) def testRollback1(self): assert CDay(10).rollback(datetime(2007, 12, 31)) == datetime(2007, 12, 31) @@ -252,30 +240,18 @@ def test_apply_with_extra_offset(self, case): class TestCustomBusinessMonthEnd: - @pytest.fixture - def _offset(self): - return CBMonthEnd - - @pytest.fixture - def offset(self): - return CBMonthEnd() - - @pytest.fixture - def offset2(self): - return CBMonthEnd(2) - - def test_different_normalize_equals(self, _offset): + def test_different_normalize_equals(self): # GH#21404 changed __eq__ to return False when `normalize` does not match - offset = _offset() - offset2 = _offset(normalize=True) + offset = CBMonthEnd() + offset2 = CBMonthEnd(normalize=True) assert offset != offset2 - def test_repr(self, offset, offset2): - assert repr(offset) == "" - assert repr(offset2) == "<2 * CustomBusinessMonthEnds>" + def test_repr(self): + assert repr(CBMonthEnd()) == "" + assert repr(CBMonthEnd(2)) == "<2 * CustomBusinessMonthEnds>" - def test_add_datetime(self, dt, offset2): - assert offset2 + dt == datetime(2008, 2, 29) + def test_add_datetime(self, dt): + assert CBMonthEnd(2) + dt == datetime(2008, 2, 29) def testRollback1(self): assert CDay(10).rollback(datetime(2007, 12, 31)) == datetime(2007, 12, 31) diff --git a/pandas/tests/util/test_assert_produces_warning.py b/pandas/tests/util/test_assert_produces_warning.py index 5c27a3ee79d4a..88e9f0d8fccee 100644 --- a/pandas/tests/util/test_assert_produces_warning.py +++ b/pandas/tests/util/test_assert_produces_warning.py @@ -13,26 +13,6 @@ import pandas._testing as tm -@pytest.fixture( - params=[ - RuntimeWarning, - ResourceWarning, - UserWarning, - FutureWarning, - DeprecationWarning, - PerformanceWarning, - DtypeWarning, - ], -) -def category(request): - """ - Return unique warning. - - Useful for testing behavior of tm.assert_produces_warning with various categories. - """ - return request.param - - @pytest.fixture( params=[ (RuntimeWarning, UserWarning), @@ -73,6 +53,18 @@ def test_assert_produces_warning_honors_filter(): f() +@pytest.mark.parametrize( + "category", + [ + RuntimeWarning, + ResourceWarning, + UserWarning, + FutureWarning, + DeprecationWarning, + PerformanceWarning, + DtypeWarning, + ], +) @pytest.mark.parametrize( "message, match", [ From c7f7813e814ddf49a0d7e8b36d5b0a7089d13368 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 25 Dec 2023 17:05:59 -0800 Subject: [PATCH 06/12] Fix typos --- pandas/tests/indexes/datetimelike_/test_nat.py | 2 +- pandas/tests/indexes/interval/test_constructors.py | 6 +++--- pandas/tests/indexes/interval/test_interval_tree.py | 2 +- pandas/tests/indexes/numeric/test_numeric.py | 2 +- pandas/tests/io/test_orc.py | 2 +- pandas/tests/tools/test_to_datetime.py | 2 +- pandas/tests/tseries/offsets/test_common.py | 3 +-- 7 files changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/tests/indexes/datetimelike_/test_nat.py b/pandas/tests/indexes/datetimelike_/test_nat.py index 47a5e4638e69d..1a667536b9266 100644 --- a/pandas/tests/indexes/datetimelike_/test_nat.py +++ b/pandas/tests/indexes/datetimelike_/test_nat.py @@ -10,7 +10,7 @@ import pandas._testing as tm -@pytest.mark.parameterize( +@pytest.mark.parametrize( "index_without_na", [ TimedeltaIndex(["1 days", "2 days"]), diff --git a/pandas/tests/indexes/interval/test_constructors.py b/pandas/tests/indexes/interval/test_constructors.py index c4bfd9c8e0c1d..e9864723f026e 100644 --- a/pandas/tests/indexes/interval/test_constructors.py +++ b/pandas/tests/indexes/interval/test_constructors.py @@ -30,7 +30,7 @@ class ConstructorTests: get_kwargs_from_breaks to the expected format. """ - @pytest.mark.parameterize( + @pytest.mark.parametrize( "breaks_and_expected_subtype", [ ([3, 14, 15, 92, 653], np.int64), @@ -46,7 +46,7 @@ class ConstructorTests: (timedelta_range("1 day", periods=10), " Date: Mon, 25 Dec 2023 17:34:51 -0800 Subject: [PATCH 07/12] Fix more typos --- pandas/tests/frame/methods/test_join.py | 4 ++-- pandas/tests/frame/methods/test_nlargest.py | 2 +- pandas/tests/frame/test_subclass.py | 2 +- pandas/tests/groupby/test_index_as_string.py | 2 +- pandas/tests/indexes/datetimelike_/test_nat.py | 2 +- pandas/tests/indexes/numeric/test_indexing.py | 2 +- pandas/tests/indexes/numeric/test_numeric.py | 3 ++- pandas/tests/io/json/test_normalize.py | 2 +- pandas/tests/test_downstream.py | 5 ++++- 9 files changed, 14 insertions(+), 10 deletions(-) diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py index 06c0f6ea3c5c0..6d181bb13786a 100644 --- a/pandas/tests/frame/methods/test_join.py +++ b/pandas/tests/frame/methods/test_join.py @@ -93,7 +93,7 @@ def right_w_dups(right_no_dup): ), ], ) -def test_join(left, right, how, sort, expected): +def test_join(how, sort, expected): left = DataFrame({"a": [20, 10, 0]}, index=[2, 1, 0]) right = DataFrame({"b": [300, 100, 200]}, index=[3, 1, 2]) result = left.join(right, how=how, sort=sort, validate="1:1") @@ -330,7 +330,7 @@ def test_join_overlap(float_frame): tm.assert_frame_equal(joined, expected.loc[:, joined.columns]) -def test_join_period_index(frame_with_period_index): +def test_join_period_index(): frame_with_period_index = DataFrame( data=np.arange(20).reshape(4, 5), columns=list("abcde"), diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py index 22227bbb62d39..8a7b985c98069 100644 --- a/pandas/tests/frame/methods/test_nlargest.py +++ b/pandas/tests/frame/methods/test_nlargest.py @@ -62,7 +62,7 @@ class TestNLargestNSmallest: ], ) @pytest.mark.parametrize("n", range(1, 11)) - def test_nlargest_n(self, df_strings, nselect_method, n, order): + def test_nlargest_n(self, nselect_method, n, order): # GH#10393 df = pd.DataFrame( { diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index cf4f2071a8aae..91f5de8e7d7f3 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -700,7 +700,7 @@ def test_idxmax_preserves_subclass(self): result = df.idxmax() assert isinstance(result, tm.SubclassedSeries) - def test_convert_dtypes_preserves_subclass(self, gpd_style_subclass_df): + def test_convert_dtypes_preserves_subclass(self): # GH 43668 df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) result = df.convert_dtypes() diff --git a/pandas/tests/groupby/test_index_as_string.py b/pandas/tests/groupby/test_index_as_string.py index 6d0e07b19b665..743db7e70b14b 100644 --- a/pandas/tests/groupby/test_index_as_string.py +++ b/pandas/tests/groupby/test_index_as_string.py @@ -49,7 +49,7 @@ def test_grouper_index_level_as_string(levels, key_strs, groupers): ["B", "outer", "inner"], ], ) -def test_grouper_index_level_as_string_series(series, levels): +def test_grouper_index_level_as_string_series(levels): # Compute expected result df = pd.DataFrame( { diff --git a/pandas/tests/indexes/datetimelike_/test_nat.py b/pandas/tests/indexes/datetimelike_/test_nat.py index 1a667536b9266..3dd0ce1cbd637 100644 --- a/pandas/tests/indexes/datetimelike_/test_nat.py +++ b/pandas/tests/indexes/datetimelike_/test_nat.py @@ -19,7 +19,7 @@ DatetimeIndex(["2011-01-01", "2011-01-02"], tz="UTC"), ], ) -def test_nat(self, index_without_na): +def test_nat(index_without_na): empty_index = index_without_na[:0] index_with_na = index_without_na.copy(deep=True) diff --git a/pandas/tests/indexes/numeric/test_indexing.py b/pandas/tests/indexes/numeric/test_indexing.py index 619b207d402fc..f2458a6c6114d 100644 --- a/pandas/tests/indexes/numeric/test_indexing.py +++ b/pandas/tests/indexes/numeric/test_indexing.py @@ -296,7 +296,7 @@ def test_get_indexer_int64(self): expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected) - def test_get_indexer_uint64(self, index_large): + def test_get_indexer_uint64(self): index_large = Index( [2**63, 2**63 + 10, 2**63 + 15, 2**63 + 20, 2**63 + 25], dtype=np.uint64, diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py index 486d0a51a0866..3b47446d6da7f 100644 --- a/pandas/tests/indexes/numeric/test_numeric.py +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -32,7 +32,8 @@ def float_index(self, dtype): ], ids=["mixed", "float", "mixed_dec", "float_dec"], ) - def test_repr_roundtrip(self, index): + def test_repr_roundtrip(self, index_data, dtype): + index = Index(index_data, dtype=dtype) tm.assert_index_equal(eval(repr(index)), index, exact=True) def check_coerce(self, a, b, is_float_index=True): diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index 4ef288d5fae27..7914d40ea8aaa 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -396,7 +396,7 @@ def test_non_ascii_key(self): result = json_normalize(json.loads(testjson)) tm.assert_frame_equal(result, expected) - def test_missing_field(self, author_missing_data): + def test_missing_field(self): # GH20030: author_missing_data = [ {"info": None}, diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index c0bc82195990a..10776fe5d050f 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -2,6 +2,7 @@ Testing that we work in the downstream packages """ import array +from functools import partial import subprocess import sys @@ -261,7 +262,9 @@ def __radd__(self, other): @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"]) -@pytest.mark.parametrize("box", [memoryview, array.array, "dask", "xarray"]) +@pytest.mark.parametrize( + "box", [memoryview, partial(array.array, "i"), "dask", "xarray"] +) def test_from_obscure_array(dtype, box): # GH#24539 recognize e.g xarray, dask, ... # Note: we dont do this for PeriodArray bc _from_sequence won't accept From d1d999d5238897371ab38256d050eb04814012de Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 25 Dec 2023 17:46:19 -0800 Subject: [PATCH 08/12] fix typo --- pandas/tests/indexes/numeric/test_numeric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py index 3b47446d6da7f..3ae746ac87179 100644 --- a/pandas/tests/indexes/numeric/test_numeric.py +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -23,7 +23,7 @@ def float_index(self, dtype): return Index([0.0, 2.5, 5.0, 7.5, 10.0], dtype=dtype) @pytest.mark.parametrize( - "index", + "index_data", [ [1.5, 2, 3, 4, 5], [0.0, 2.5, 5.0, 7.5, 10.0], From 587907d5ab187a3d3661afddc703e19eb2e28db5 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 25 Dec 2023 18:51:50 -0800 Subject: [PATCH 09/12] pylint --- pandas/tests/groupby/test_indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_indexing.py b/pandas/tests/groupby/test_indexing.py index 4974e13a1b6ea..f839bf156ca00 100644 --- a/pandas/tests/groupby/test_indexing.py +++ b/pandas/tests/groupby/test_indexing.py @@ -150,7 +150,7 @@ def _make_df_from_data(data): df = _make_df_from_data(multiindex_data) result = df.groupby("Date", as_index=False).nth(slice(3, -3)) - sliced = {date: multiindex_data[date][3:-3] for date in multiindex_data} + sliced = {date: values[3:-3] for date, values in multiindex_data.items()} expected = _make_df_from_data(sliced) tm.assert_frame_equal(result, expected) From 6b2bca6b14b8233886190386d9a94fa24b30e245 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 28 Dec 2023 11:35:12 -0800 Subject: [PATCH 10/12] Uncomment --- pandas/tests/groupby/test_all_methods.py | 69 ++++++++++++------------ 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/pandas/tests/groupby/test_all_methods.py b/pandas/tests/groupby/test_all_methods.py index 9560310489a54..ad35bec70f668 100644 --- a/pandas/tests/groupby/test_all_methods.py +++ b/pandas/tests/groupby/test_all_methods.py @@ -18,44 +18,45 @@ import pandas._testing as tm from pandas.tests.groupby import get_groupby_method_args -# def test_multiindex_group_all_columns_when_empty(groupby_func): -# # GH 32464 -# df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"]) -# gb = df.groupby(["a", "b", "c"], group_keys=False) -# method = getattr(gb, groupby_func) -# args = get_groupby_method_args(groupby_func, df) -# warn = FutureWarning if groupby_func == "fillna" else None -# warn_msg = "DataFrameGroupBy.fillna is deprecated" -# with tm.assert_produces_warning(warn, match=warn_msg): -# result = method(*args).index -# expected = df.index -# tm.assert_index_equal(result, expected) +def test_multiindex_group_all_columns_when_empty(groupby_func): + # GH 32464 + df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"]) + gb = df.groupby(["a", "b", "c"], group_keys=False) + method = getattr(gb, groupby_func) + args = get_groupby_method_args(groupby_func, df) + + warn = FutureWarning if groupby_func == "fillna" else None + warn_msg = "DataFrameGroupBy.fillna is deprecated" + with tm.assert_produces_warning(warn, match=warn_msg): + result = method(*args).index + expected = df.index + tm.assert_index_equal(result, expected) -# def test_duplicate_columns(request, groupby_func, as_index): -# # GH#50806 -# if groupby_func == "corrwith": -# msg = "GH#50845 - corrwith fails when there are duplicate columns" -# request.applymarker(pytest.mark.xfail(reason=msg)) -# df = DataFrame([[1, 3, 6], [1, 4, 7], [2, 5, 8]], columns=list("abb")) -# args = get_groupby_method_args(groupby_func, df) -# gb = df.groupby("a", as_index=as_index) -# warn = FutureWarning if groupby_func == "fillna" else None -# warn_msg = "DataFrameGroupBy.fillna is deprecated" -# with tm.assert_produces_warning(warn, match=warn_msg): -# result = getattr(gb, groupby_func)(*args) +def test_duplicate_columns(request, groupby_func, as_index): + # GH#50806 + if groupby_func == "corrwith": + msg = "GH#50845 - corrwith fails when there are duplicate columns" + request.applymarker(pytest.mark.xfail(reason=msg)) + df = DataFrame([[1, 3, 6], [1, 4, 7], [2, 5, 8]], columns=list("abb")) + args = get_groupby_method_args(groupby_func, df) + gb = df.groupby("a", as_index=as_index) + warn = FutureWarning if groupby_func == "fillna" else None + warn_msg = "DataFrameGroupBy.fillna is deprecated" + with tm.assert_produces_warning(warn, match=warn_msg): + result = getattr(gb, groupby_func)(*args) -# expected_df = df.set_axis(["a", "b", "c"], axis=1) -# expected_args = get_groupby_method_args(groupby_func, expected_df) -# expected_gb = expected_df.groupby("a", as_index=as_index) -# warn = FutureWarning if groupby_func == "fillna" else None -# warn_msg = "DataFrameGroupBy.fillna is deprecated" -# with tm.assert_produces_warning(warn, match=warn_msg): -# expected = getattr(expected_gb, groupby_func)(*expected_args) -# if groupby_func not in ("size", "ngroup", "cumcount"): -# expected = expected.rename(columns={"c": "b"}) -# tm.assert_equal(result, expected) + expected_df = df.set_axis(["a", "b", "c"], axis=1) + expected_args = get_groupby_method_args(groupby_func, expected_df) + expected_gb = expected_df.groupby("a", as_index=as_index) + warn = FutureWarning if groupby_func == "fillna" else None + warn_msg = "DataFrameGroupBy.fillna is deprecated" + with tm.assert_produces_warning(warn, match=warn_msg): + expected = getattr(expected_gb, groupby_func)(*expected_args) + if groupby_func not in ("size", "ngroup", "cumcount"): + expected = expected.rename(columns={"c": "b"}) + tm.assert_equal(result, expected) @pytest.mark.parametrize( From 5c1d5d7c40c37b9b3b57765e065f32d2e3af9069 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 2 Jan 2024 14:37:09 -0800 Subject: [PATCH 11/12] add back parametrization --- pandas/tests/indexes/interval/test_interval.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index bce917b2fed62..7391d39bdde7b 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -860,6 +860,7 @@ def test_nbytes(self): expected = 64 # 4 * 8 * 2 assert result == expected + @pytest.mark.parametrize("name", [None, "foo"]) def test_set_closed(self, name, closed, other_closed): # GH 21670 index = interval_range(0, 5, closed=closed, name=name) From a3a9a9fd6990c7d6583ff4e769394bce7649e4c1 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 2 Jan 2024 15:49:16 -0800 Subject: [PATCH 12/12] Simplify --- pandas/tests/tseries/offsets/test_common.py | 25 +++++++++------------ 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/pandas/tests/tseries/offsets/test_common.py b/pandas/tests/tseries/offsets/test_common.py index 51acd2f094684..3792878973c15 100644 --- a/pandas/tests/tseries/offsets/test_common.py +++ b/pandas/tests/tseries/offsets/test_common.py @@ -229,23 +229,20 @@ def test_sub(date, offset_box, offset2): @pytest.mark.parametrize( - "offset_box, offset1, dt", + "offset_box, offset1", [ - [BDay, BDay(), Timestamp(2008, 1, 1)], - [LastWeekOfMonth, LastWeekOfMonth(), Timestamp(2008, 1, 2)], - [WeekOfMonth, WeekOfMonth(), Timestamp(2008, 1, 2)], - [Week, Week(), Timestamp(2008, 1, 2)], - [SemiMonthBegin, SemiMonthBegin(), Timestamp(2008, 1, 2)], - [SemiMonthEnd, SemiMonthEnd(), Timestamp(2008, 1, 2)], - [ - CustomBusinessHour, - CustomBusinessHour(weekmask="Tue Wed Thu Fri"), - Timestamp(2014, 7, 1, 10, 00), - ], - [BusinessHour, BusinessHour(), Timestamp(2014, 7, 1, 10, 00)], + [BDay, BDay()], + [LastWeekOfMonth, LastWeekOfMonth()], + [WeekOfMonth, WeekOfMonth()], + [Week, Week()], + [SemiMonthBegin, SemiMonthBegin()], + [SemiMonthEnd, SemiMonthEnd()], + [CustomBusinessHour, CustomBusinessHour(weekmask="Tue Wed Thu Fri")], + [BusinessHour, BusinessHour()], ], ) -def test_Mult1(offset_box, offset1, dt): +def test_Mult1(offset_box, offset1): + dt = Timestamp(2008, 1, 2) assert dt + 10 * offset1 == dt + offset_box(10) assert dt + 5 * offset1 == dt + offset_box(5)