diff --git a/pandas/tests/indexes/base_class/test_setops.py b/pandas/tests/indexes/base_class/test_setops.py index 3ef3f3ad4d3a2..49c6a91236db7 100644 --- a/pandas/tests/indexes/base_class/test_setops.py +++ b/pandas/tests/indexes/base_class/test_setops.py @@ -149,13 +149,13 @@ def test_intersection_str_dates(self, sort): @pytest.mark.parametrize( "index2,expected_arr", - [(Index(["B", "D"]), ["B"]), (Index(["B", "D", "A"]), ["A", "B"])], + [(["B", "D"], ["B"]), (["B", "D", "A"], ["A", "B"])], ) def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort): # non-monotonic non-unique index1 = Index(["A", "B", "A", "C"]) expected = Index(expected_arr) - result = index1.intersection(index2, sort=sort) + result = index1.intersection(Index(index2), sort=sort) if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 019d434680661..024f37ee5b710 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -526,17 +526,13 @@ def test_range_tz_pytz(self): @pytest.mark.parametrize( "start, end", [ - [ - Timestamp(datetime(2014, 3, 6), tz="US/Eastern"), - Timestamp(datetime(2014, 3, 12), tz="US/Eastern"), - ], - [ - Timestamp(datetime(2013, 11, 1), tz="US/Eastern"), - Timestamp(datetime(2013, 11, 6), tz="US/Eastern"), - ], + [datetime(2014, 3, 6), datetime(2014, 3, 12)], + [datetime(2013, 11, 1), datetime(2013, 11, 6)], ], ) def test_range_tz_dst_straddle_pytz(self, start, end): + start = Timestamp(start, tz="US/Eastern") + end = Timestamp(end, tz="US/Eastern") dr = date_range(start, end, freq="D") assert dr[0] == start assert dr[-1] == end diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 7391d39bdde7b..006a06e529971 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -789,23 +789,20 @@ def test_is_overlapping(self, start, shift, na_value, closed): @pytest.mark.parametrize( "tuples", [ - list(zip(range(10), range(1, 11))), - list( - zip( - date_range("20170101", periods=10), - date_range("20170101", periods=10), - ) + zip(range(10), range(1, 11)), + zip( + date_range("20170101", periods=10), + date_range("20170101", periods=10), ), - list( - zip( - timedelta_range("0 days", periods=10), - timedelta_range("1 day", periods=10), - ) + zip( + timedelta_range("0 days", periods=10), + timedelta_range("1 day", periods=10), ), ], ) def test_to_tuples(self, tuples): # GH 18756 + tuples = list(tuples) idx = IntervalIndex.from_tuples(tuples) result = idx.to_tuples() expected = Index(com.asarray_tuplesafe(tuples)) diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py index 6c6d9022b1af3..1bbeedac3fb10 100644 --- a/pandas/tests/indexes/multi/test_duplicates.py +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -243,13 +243,14 @@ def f(a): @pytest.mark.parametrize( "keep, expected", [ - ("first", np.array([False, False, False, True, True, False])), - ("last", np.array([False, True, True, False, False, False])), - (False, np.array([False, True, True, True, True, False])), + ("first", [False, False, False, True, True, False]), + ("last", [False, True, True, False, False, False]), + (False, [False, True, True, True, True, False]), ], ) def test_duplicated(idx_dup, keep, expected): result = idx_dup.duplicated(keep=keep) + expected = np.array(expected) tm.assert_numpy_array_equal(result, expected) @@ -319,14 +320,7 @@ def test_duplicated_drop_duplicates(): tm.assert_index_equal(idx.drop_duplicates(keep=False), expected) -@pytest.mark.parametrize( - "dtype", - [ - np.complex64, - np.complex128, - ], -) -def test_duplicated_series_complex_numbers(dtype): +def test_duplicated_series_complex_numbers(complex_dtype): # GH 17927 expected = Series( [False, False, False, True, False, False, False, True, False, True], @@ -345,7 +339,7 @@ def test_duplicated_series_complex_numbers(dtype): np.nan, np.nan + np.nan * 1j, ], - dtype=dtype, + dtype=complex_dtype, ).duplicated() tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 5e2d3c23da645..f426a3ee42566 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -560,27 +560,26 @@ def test_getitem_group_select(idx): assert sorted_idx.get_loc("foo") == slice(0, 2) -@pytest.mark.parametrize("ind1", [[True] * 5, Index([True] * 5)]) -@pytest.mark.parametrize( - "ind2", - [[True, False, True, False, False], Index([True, False, True, False, False])], -) -def test_getitem_bool_index_all(ind1, ind2): +@pytest.mark.parametrize("box", [list, Index]) +def test_getitem_bool_index_all(box): # GH#22533 + ind1 = box([True] * 5) idx = MultiIndex.from_tuples([(10, 1), (20, 2), (30, 3), (40, 4), (50, 5)]) tm.assert_index_equal(idx[ind1], idx) + ind2 = box([True, False, True, False, False]) expected = MultiIndex.from_tuples([(10, 1), (30, 3)]) tm.assert_index_equal(idx[ind2], expected) -@pytest.mark.parametrize("ind1", [[True], Index([True])]) -@pytest.mark.parametrize("ind2", [[False], Index([False])]) -def test_getitem_bool_index_single(ind1, ind2): +@pytest.mark.parametrize("box", [list, Index]) +def test_getitem_bool_index_single(box): # GH#22533 + ind1 = box([True]) idx = MultiIndex.from_tuples([(10, 1)]) tm.assert_index_equal(idx[ind1], idx) + ind2 = box([False]) expected = MultiIndex( levels=[np.array([], dtype=np.int64), np.array([], dtype=np.int64)], codes=[[], []], diff --git a/pandas/tests/indexes/multi/test_isin.py b/pandas/tests/indexes/multi/test_isin.py index 68fdf25359f1b..92ac2468d5993 100644 --- a/pandas/tests/indexes/multi/test_isin.py +++ b/pandas/tests/indexes/multi/test_isin.py @@ -75,15 +75,16 @@ def test_isin_level_kwarg(): @pytest.mark.parametrize( "labels,expected,level", [ - ([("b", np.nan)], np.array([False, False, True]), None), - ([np.nan, "a"], np.array([True, True, False]), 0), - (["d", np.nan], np.array([False, True, True]), 1), + ([("b", np.nan)], [False, False, True], None), + ([np.nan, "a"], [True, True, False], 0), + (["d", np.nan], [False, True, True], 1), ], ) def test_isin_multi_index_with_missing_value(labels, expected, level): # GH 19132 midx = MultiIndex.from_arrays([[np.nan, "a", "b"], ["c", "d", np.nan]]) result = midx.isin(labels, level=level) + expected = np.array(expected) tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py index edd0feaaa1159..3fb428fecea41 100644 --- a/pandas/tests/indexes/multi/test_join.py +++ b/pandas/tests/indexes/multi/test_join.py @@ -12,10 +12,9 @@ import pandas._testing as tm -@pytest.mark.parametrize( - "other", [Index(["three", "one", "two"]), Index(["one"]), Index(["one", "three"])] -) +@pytest.mark.parametrize("other", [["three", "one", "two"], ["one"], ["one", "three"]]) def test_join_level(idx, other, join_type): + other = Index(other) join_index, lidx, ridx = other.join( idx, how=join_type, level="second", return_indexers=True ) diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index 0abb56ecf9de7..9354984538c58 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -711,17 +711,11 @@ def test_intersection_lexsort_depth(levels1, levels2, codes1, codes2, names): "a", [pd.Categorical(["a", "b"], categories=["a", "b"]), ["a", "b"]], ) -@pytest.mark.parametrize( - "b", - [ - pd.Categorical(["a", "b"], categories=["b", "a"], ordered=True), - pd.Categorical(["a", "b"], categories=["b", "a"]), - ], -) -def test_intersection_with_non_lex_sorted_categories(a, b): +@pytest.mark.parametrize("b_ordered", [True, False]) +def test_intersection_with_non_lex_sorted_categories(a, b_ordered): # GH#49974 other = ["1", "2"] - + b = pd.Categorical(["a", "b"], categories=["b", "a"], ordered=b_ordered) df1 = DataFrame({"x": a, "y": other}) df2 = DataFrame({"x": b, "y": other}) diff --git a/pandas/tests/indexes/numeric/test_indexing.py b/pandas/tests/indexes/numeric/test_indexing.py index 29f8a0a5a5932..43adc09774914 100644 --- a/pandas/tests/indexes/numeric/test_indexing.py +++ b/pandas/tests/indexes/numeric/test_indexing.py @@ -110,16 +110,16 @@ def test_get_indexer(self): @pytest.mark.parametrize( "expected,method", [ - (np.array([-1, 0, 0, 1, 1], dtype=np.intp), "pad"), - (np.array([-1, 0, 0, 1, 1], dtype=np.intp), "ffill"), - (np.array([0, 0, 1, 1, 2], dtype=np.intp), "backfill"), - (np.array([0, 0, 1, 1, 2], dtype=np.intp), "bfill"), + ([-1, 0, 0, 1, 1], "pad"), + ([-1, 0, 0, 1, 1], "ffill"), + ([0, 0, 1, 1, 2], "backfill"), + ([0, 0, 1, 1, 2], "bfill"), ], ) def test_get_indexer_methods(self, reverse, expected, method): index1 = Index([1, 2, 3, 4, 5]) index2 = Index([2, 4, 6]) - + expected = np.array(expected, dtype=np.intp) if reverse: index1 = index1[::-1] expected = expected[::-1] @@ -166,12 +166,11 @@ def test_get_indexer_nearest(self, method, tolerance, indexer, expected): @pytest.mark.parametrize("listtype", [list, tuple, Series, np.array]) @pytest.mark.parametrize( "tolerance, expected", - list( - zip( - [[0.3, 0.3, 0.1], [0.2, 0.1, 0.1], [0.1, 0.5, 0.5]], - [[0, 2, -1], [0, -1, -1], [-1, 2, 9]], - ) - ), + [ + [[0.3, 0.3, 0.1], [0, 2, -1]], + [[0.2, 0.1, 0.1], [0, -1, -1]], + [[0.1, 0.5, 0.5], [-1, 2, 9]], + ], ) def test_get_indexer_nearest_listlike_tolerance( self, tolerance, expected, listtype diff --git a/pandas/tests/indexes/numeric/test_setops.py b/pandas/tests/indexes/numeric/test_setops.py index 102560852e8e4..e9e5a57dfe9e5 100644 --- a/pandas/tests/indexes/numeric/test_setops.py +++ b/pandas/tests/indexes/numeric/test_setops.py @@ -113,13 +113,14 @@ def test_intersection_uint64_outside_int64_range(self): tm.assert_index_equal(result, expected) @pytest.mark.parametrize( - "index2,keeps_name", + "index2_name,keeps_name", [ - (Index([4, 7, 6, 5, 3], name="index"), True), - (Index([4, 7, 6, 5, 3], name="other"), False), + ("index", True), + ("other", False), ], ) - def test_intersection_monotonic(self, index2, keeps_name, sort): + def test_intersection_monotonic(self, index2_name, keeps_name, sort): + index2 = Index([4, 7, 6, 5, 3], name=index2_name) index1 = Index([5, 3, 2, 4, 1], name="index") expected = Index([5, 3, 4]) diff --git a/pandas/tests/indexes/object/test_indexing.py b/pandas/tests/indexes/object/test_indexing.py index ebf9dac715f8d..039836da75cd5 100644 --- a/pandas/tests/indexes/object/test_indexing.py +++ b/pandas/tests/indexes/object/test_indexing.py @@ -18,11 +18,12 @@ class TestGetIndexer: @pytest.mark.parametrize( "method,expected", [ - ("pad", np.array([-1, 0, 1, 1], dtype=np.intp)), - ("backfill", np.array([0, 0, 1, -1], dtype=np.intp)), + ("pad", [-1, 0, 1, 1]), + ("backfill", [0, 0, 1, -1]), ], ) def test_get_indexer_strings(self, method, expected): + expected = np.array(expected, dtype=np.intp) index = Index(["b", "c"]) actual = index.get_indexer(["a", "b", "c", "d"], method=method) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 158cba9dfdded..77ce687d51693 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -844,12 +844,14 @@ def test_is_monotonic_incomparable(self, attr): @pytest.mark.parametrize( "index,expected", [ - (Index(["qux", "baz", "foo", "bar"]), np.array([False, False, True, True])), - (Index([]), np.array([], dtype=bool)), # empty + (["qux", "baz", "foo", "bar"], [False, False, True, True]), + ([], []), # empty ], ) def test_isin(self, values, index, expected): + index = Index(index) result = index.isin(values) + expected = np.array(expected, dtype=bool) tm.assert_numpy_array_equal(result, expected) def test_isin_nan_common_object( @@ -918,11 +920,12 @@ def test_isin_nan_common_float64(self, nulls_fixture, float_numpy_dtype): @pytest.mark.parametrize( "index", [ - Index(["qux", "baz", "foo", "bar"]), - Index([1.0, 2.0, 3.0, 4.0], dtype=np.float64), + ["qux", "baz", "foo", "bar"], + np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float64), ], ) def test_isin_level_kwarg(self, level, index): + index = Index(index) values = index.tolist()[-2:] + ["nonexisting"] expected = np.array([False, False, True, True]) @@ -1078,10 +1081,11 @@ def test_str_bool_series_indexing(self): tm.assert_series_equal(result, expected) @pytest.mark.parametrize( - "index,expected", [(Index(list("abcd")), True), (Index(range(4)), False)] + "index,expected", [(list("abcd"), True), (range(4), False)] ) def test_tab_completion(self, index, expected): # GH 9910 + index = Index(index) result = "str" in dir(index) assert result == expected @@ -1164,15 +1168,11 @@ def test_reindex_preserves_type_if_target_is_empty_list_or_array(self, labels): index = Index(list("abc")) assert index.reindex(labels)[0].dtype.type == index.dtype.type - @pytest.mark.parametrize( - "labels,dtype", - [ - (DatetimeIndex([]), np.datetime64), - ], - ) - def test_reindex_doesnt_preserve_type_if_target_is_empty_index(self, labels, dtype): + def test_reindex_doesnt_preserve_type_if_target_is_empty_index(self): # GH7774 index = Index(list("abc")) + labels = DatetimeIndex([]) + dtype = np.datetime64 assert index.reindex(labels)[0].dtype.type == dtype def test_reindex_doesnt_preserve_type_if_target_is_empty_index_numeric( diff --git a/pandas/tests/indexes/test_index_new.py b/pandas/tests/indexes/test_index_new.py index 72641077c90fe..867d32e5c86a2 100644 --- a/pandas/tests/indexes/test_index_new.py +++ b/pandas/tests/indexes/test_index_new.py @@ -80,14 +80,10 @@ def test_construction_list_tuples_nan(self, na_value, vtype): expected = MultiIndex.from_tuples(values) tm.assert_index_equal(result, expected) - @pytest.mark.parametrize( - "dtype", - [int, "int64", "int32", "int16", "int8", "uint64", "uint32", "uint16", "uint8"], - ) - def test_constructor_int_dtype_float(self, dtype): + def test_constructor_int_dtype_float(self, any_int_numpy_dtype): # GH#18400 - expected = Index([0, 1, 2, 3], dtype=dtype) - result = Index([0.0, 1.0, 2.0, 3.0], dtype=dtype) + expected = Index([0, 1, 2, 3], dtype=any_int_numpy_dtype) + result = Index([0.0, 1.0, 2.0, 3.0], dtype=any_int_numpy_dtype) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("cast_index", [True, False]) @@ -332,11 +328,12 @@ def test_constructor_dtypes_to_categorical(self, vals): @pytest.mark.parametrize( "vals", [ - Index(np.array([np.datetime64("2011-01-01"), np.datetime64("2011-01-02")])), - Index([datetime(2011, 1, 1), datetime(2011, 1, 2)]), + np.array([np.datetime64("2011-01-01"), np.datetime64("2011-01-02")]), + [datetime(2011, 1, 1), datetime(2011, 1, 2)], ], ) def test_constructor_dtypes_to_datetime(self, cast_index, vals): + vals = Index(vals) if cast_index: index = Index(vals, dtype=object) assert isinstance(index, Index) diff --git a/pandas/tests/indexes/test_indexing.py b/pandas/tests/indexes/test_indexing.py index 1ea47f636ac9b..e6716239cca5a 100644 --- a/pandas/tests/indexes/test_indexing.py +++ b/pandas/tests/indexes/test_indexing.py @@ -92,15 +92,16 @@ class TestContains: @pytest.mark.parametrize( "index,val", [ - (Index([0, 1, 2]), 2), - (Index([0, 1, "2"]), "2"), - (Index([0, 1, 2, np.inf, 4]), 4), - (Index([0, 1, 2, np.nan, 4]), 4), - (Index([0, 1, 2, np.inf]), np.inf), - (Index([0, 1, 2, np.nan]), np.nan), + ([0, 1, 2], 2), + ([0, 1, "2"], "2"), + ([0, 1, 2, np.inf, 4], 4), + ([0, 1, 2, np.nan, 4], 4), + ([0, 1, 2, np.inf], np.inf), + ([0, 1, 2, np.nan], np.nan), ], ) def test_index_contains(self, index, val): + index = Index(index) assert val in index @pytest.mark.parametrize( @@ -123,18 +124,16 @@ def test_index_contains(self, index, val): def test_index_not_contains(self, index, val): assert val not in index - @pytest.mark.parametrize( - "index,val", [(Index([0, 1, "2"]), 0), (Index([0, 1, "2"]), "2")] - ) - def test_mixed_index_contains(self, index, val): + @pytest.mark.parametrize("val", [0, "2"]) + def test_mixed_index_contains(self, val): # GH#19860 + index = Index([0, 1, "2"]) assert val in index - @pytest.mark.parametrize( - "index,val", [(Index([0, 1, "2"]), "1"), (Index([0, 1, "2"]), 2)] - ) + @pytest.mark.parametrize("val", ["1", 2]) def test_mixed_index_not_contains(self, index, val): # GH#19860 + index = Index([0, 1, "2"]) assert val not in index def test_contains_with_float_index(self, any_real_numpy_dtype): @@ -303,12 +302,10 @@ def test_putmask_with_wrong_mask(self, index): index.putmask("foo", fill) -@pytest.mark.parametrize( - "idx", [Index([1, 2, 3]), Index([0.1, 0.2, 0.3]), Index(["a", "b", "c"])] -) +@pytest.mark.parametrize("idx", [[1, 2, 3], [0.1, 0.2, 0.3], ["a", "b", "c"]]) def test_getitem_deprecated_float(idx): # https://github.com/pandas-dev/pandas/issues/34191 - + idx = Index(idx) msg = "Indexing with a float is no longer supported" with pytest.raises(IndexError, match=msg): idx[1.0] diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 8f4dd1c64236a..27b54ea66f0ac 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -721,14 +721,15 @@ def test_intersection(self, index, sort): assert inter is first @pytest.mark.parametrize( - "index2,keeps_name", + "index2_name,keeps_name", [ - (Index([3, 4, 5, 6, 7], name="index"), True), # preserve same name - (Index([3, 4, 5, 6, 7], name="other"), False), # drop diff names - (Index([3, 4, 5, 6, 7]), False), + ("index", True), # preserve same name + ("other", False), # drop diff names + (None, False), ], ) - def test_intersection_name_preservation(self, index2, keeps_name, sort): + def test_intersection_name_preservation(self, index2_name, keeps_name, sort): + index2 = Index([3, 4, 5, 6, 7], name=index2_name) index1 = Index([1, 2, 3, 4, 5], name="index") expected = Index([3, 4, 5]) result = index1.intersection(index2, sort) @@ -915,11 +916,13 @@ def test_symmetric_difference_mi(self, sort): @pytest.mark.parametrize( "index2,expected", [ - (Index([0, 1, np.nan]), Index([2.0, 3.0, 0.0])), - (Index([0, 1]), Index([np.nan, 2.0, 3.0, 0.0])), + ([0, 1, np.nan], [2.0, 3.0, 0.0]), + ([0, 1], [np.nan, 2.0, 3.0, 0.0]), ], ) def test_symmetric_difference_missing(self, index2, expected, sort): + index2 = Index(index2) + expected = Index(expected) # GH#13514 change: {nan} - {nan} == {} # (GH#6444, sorting of nans, is no longer an issue) index1 = Index([1, np.nan, 2, 3]) diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py index cabfee9aa040a..b72ef57475305 100644 --- a/pandas/tests/indexing/interval/test_interval.py +++ b/pandas/tests/indexing/interval/test_interval.py @@ -211,10 +211,7 @@ def test_mi_intervalindex_slicing_with_scalar(self): tm.assert_series_equal(result, expected) @pytest.mark.xfail(not IS64, reason="GH 23440") - @pytest.mark.parametrize( - "base", - [101, 1010], - ) + @pytest.mark.parametrize("base", [101, 1010]) def test_reindex_behavior_with_interval_index(self, base): # GH 51826 diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index c897afaeeee0e..a9aeba0c199f9 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1383,14 +1383,12 @@ def test_loc_getitem_timedelta_0seconds(self): result = df.loc["0s":, :] tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize( - "val,expected", [(2**63 - 1, Series([1])), (2**63, Series([2]))] - ) + @pytest.mark.parametrize("val,expected", [(2**63 - 1, 1), (2**63, 2)]) def test_loc_getitem_uint64_scalar(self, val, expected): # see GH#19399 df = DataFrame([1, 2], index=[2**63 - 1, 2**63]) result = df.loc[val] - + expected = Series([expected]) expected.name = val tm.assert_series_equal(result, expected) @@ -2168,12 +2166,11 @@ def test_loc_setitem_with_expansion_nonunique_index(self, index): ) tm.assert_frame_equal(df, expected) - @pytest.mark.parametrize( - "dtype", ["Int32", "Int64", "UInt32", "UInt64", "Float32", "Float64"] - ) - def test_loc_setitem_with_expansion_preserves_nullable_int(self, dtype): + def test_loc_setitem_with_expansion_preserves_nullable_int( + self, any_numeric_ea_dtype + ): # GH#42099 - ser = Series([0, 1, 2, 3], dtype=dtype) + ser = Series([0, 1, 2, 3], dtype=any_numeric_ea_dtype) df = DataFrame({"data": ser}) result = DataFrame(index=df.index) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 15712f36da4ca..04a25317c8017 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -562,25 +562,21 @@ def test_reader_dtype(self, read_ext): [ ( None, - DataFrame( - { - "a": [1, 2, 3, 4], - "b": [2.5, 3.5, 4.5, 5.5], - "c": [1, 2, 3, 4], - "d": [1.0, 2.0, np.nan, 4.0], - } - ), + { + "a": [1, 2, 3, 4], + "b": [2.5, 3.5, 4.5, 5.5], + "c": [1, 2, 3, 4], + "d": [1.0, 2.0, np.nan, 4.0], + }, ), ( {"a": "float64", "b": "float32", "c": str, "d": str}, - DataFrame( - { - "a": Series([1, 2, 3, 4], dtype="float64"), - "b": Series([2.5, 3.5, 4.5, 5.5], dtype="float32"), - "c": Series(["001", "002", "003", "004"], dtype=object), - "d": Series(["1", "2", np.nan, "4"], dtype=object), - } - ), + { + "a": Series([1, 2, 3, 4], dtype="float64"), + "b": Series([2.5, 3.5, 4.5, 5.5], dtype="float32"), + "c": Series(["001", "002", "003", "004"], dtype=object), + "d": Series(["1", "2", np.nan, "4"], dtype=object), + }, ), ], ) @@ -589,6 +585,7 @@ def test_reader_dtype_str(self, read_ext, dtype, expected): basename = "testdtype" actual = pd.read_excel(basename + read_ext, dtype=dtype) + expected = DataFrame(expected) tm.assert_frame_equal(actual, expected) def test_dtype_backend(self, read_ext, dtype_backend, engine): diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 8c003723c1c71..76a138a295bda 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -90,7 +90,7 @@ def set_engine(engine, ext): class TestRoundTrip: @pytest.mark.parametrize( "header,expected", - [(None, DataFrame([np.nan] * 4)), (0, DataFrame({"Unnamed: 0": [np.nan] * 3}))], + [(None, [np.nan] * 4), (0, {"Unnamed: 0": [np.nan] * 3})], ) def test_read_one_empty_col_no_header(self, ext, header, expected): # xref gh-12292 @@ -102,14 +102,14 @@ def test_read_one_empty_col_no_header(self, ext, header, expected): result = pd.read_excel( path, sheet_name=filename, usecols=[0], header=header ) - + expected = DataFrame(expected) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( - "header,expected", - [(None, DataFrame([0] + [np.nan] * 4)), (0, DataFrame([np.nan] * 4))], + "header,expected_extra", + [(None, [0]), (0, [])], ) - def test_read_one_empty_col_with_header(self, ext, header, expected): + def test_read_one_empty_col_with_header(self, ext, header, expected_extra): filename = "with_header" df = DataFrame([["", 1, 100], ["", 2, 200], ["", 3, 300], ["", 4, 400]]) @@ -118,7 +118,7 @@ def test_read_one_empty_col_with_header(self, ext, header, expected): result = pd.read_excel( path, sheet_name=filename, usecols=[0], header=header ) - + expected = DataFrame(expected_extra + [np.nan] * 4) tm.assert_frame_equal(result, expected) def test_set_column_names_in_parameter(self, ext): diff --git a/pandas/tests/io/formats/style/test_highlight.py b/pandas/tests/io/formats/style/test_highlight.py index 3d59719010ee0..5d19e9c14d534 100644 --- a/pandas/tests/io/formats/style/test_highlight.py +++ b/pandas/tests/io/formats/style/test_highlight.py @@ -198,16 +198,17 @@ def test_highlight_quantile(styler, kwargs): ], ) @pytest.mark.parametrize( - "df", + "dtype", [ - DataFrame([[0, 10], [20, 30]], dtype=int), - DataFrame([[0, 10], [20, 30]], dtype=float), - DataFrame([[0, 10], [20, 30]], dtype="datetime64[ns]"), - DataFrame([[0, 10], [20, 30]], dtype=str), - DataFrame([[0, 10], [20, 30]], dtype="timedelta64[ns]"), + int, + float, + "datetime64[ns]", + str, + "timedelta64[ns]", ], ) -def test_all_highlight_dtypes(f, kwargs, df): +def test_all_highlight_dtypes(f, kwargs, dtype): + df = DataFrame([[0, 10], [20, 30]], dtype=dtype) if f == "highlight_quantile" and isinstance(df.iloc[0, 0], (str)): return None # quantile incompatible with str if f == "highlight_between": diff --git a/pandas/tests/io/formats/style/test_matplotlib.py b/pandas/tests/io/formats/style/test_matplotlib.py index fb7a77f1ddb27..ef7bfb11d81d8 100644 --- a/pandas/tests/io/formats/style/test_matplotlib.py +++ b/pandas/tests/io/formats/style/test_matplotlib.py @@ -260,15 +260,10 @@ def test_background_gradient_gmap_series_align(styler_blank, gmap, axis, exp_gma assert expected.ctx == result.ctx -@pytest.mark.parametrize( - "gmap, axis", - [ - (DataFrame([[1, 2], [2, 1]], columns=["A", "B"], index=["X", "Y"]), 1), - (DataFrame([[1, 2], [2, 1]], columns=["A", "B"], index=["X", "Y"]), 0), - ], -) -def test_background_gradient_gmap_wrong_dataframe(styler_blank, gmap, axis): +@pytest.mark.parametrize("axis", [1, 0]) +def test_background_gradient_gmap_wrong_dataframe(styler_blank, axis): # test giving a gmap in DataFrame but with wrong axis + gmap = DataFrame([[1, 2], [2, 1]], columns=["A", "B"], index=["X", "Y"]) msg = "'gmap' is a DataFrame but underlying data for operations is a Series" with pytest.raises(ValueError, match=msg): styler_blank.background_gradient(gmap=gmap, axis=axis)._compute() @@ -321,10 +316,7 @@ def test_bar_color_raises(df): df.style.bar(color="something", cmap="something else").to_html() -@pytest.mark.parametrize( - "plot_method", - ["scatter", "hexbin"], -) +@pytest.mark.parametrize("plot_method", ["scatter", "hexbin"]) def test_pass_colormap_instance(df, plot_method): # https://github.com/pandas-dev/pandas/issues/49374 cmap = mpl.colors.ListedColormap([[1, 1, 1], [0, 0, 0]]) diff --git a/pandas/tests/io/formats/style/test_to_latex.py b/pandas/tests/io/formats/style/test_to_latex.py index 7f1443c3ee66b..eb221686dd165 100644 --- a/pandas/tests/io/formats/style/test_to_latex.py +++ b/pandas/tests/io/formats/style/test_to_latex.py @@ -1058,10 +1058,10 @@ def test_concat_chain(): @pytest.mark.parametrize( - "df, expected", + "columns, expected", [ ( - DataFrame(), + None, dedent( """\ \\begin{tabular}{l} @@ -1070,7 +1070,7 @@ def test_concat_chain(): ), ), ( - DataFrame(columns=["a", "b", "c"]), + ["a", "b", "c"], dedent( """\ \\begin{tabular}{llll} @@ -1084,7 +1084,8 @@ def test_concat_chain(): @pytest.mark.parametrize( "clines", [None, "all;data", "all;index", "skip-last;data", "skip-last;index"] ) -def test_empty_clines(df: DataFrame, expected: str, clines: str): +def test_empty_clines(columns, expected: str, clines: str): # GH 47203 + df = DataFrame(columns=columns) result = df.style.to_latex(clines=clines) assert result == expected diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 790ba92f70c40..e85b4cb29390e 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -136,13 +136,14 @@ def test_to_html_with_empty_string_label(): @pytest.mark.parametrize( - "df,expected", + "df_data,expected", [ - (DataFrame({"\u03c3": np.arange(10.0)}), "unicode_1"), - (DataFrame({"A": ["\u03c3"]}), "unicode_2"), + ({"\u03c3": np.arange(10.0)}, "unicode_1"), + ({"A": ["\u03c3"]}, "unicode_2"), ], ) -def test_to_html_unicode(df, expected, datapath): +def test_to_html_unicode(df_data, expected, datapath): + df = DataFrame(df_data) expected = expected_html(datapath, expected) result = df.to_html() assert result == expected diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index 4c8cd4b6a2b8e..304aff0002209 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -283,14 +283,15 @@ def test_to_latex_longtable_without_index(self): assert result == expected @pytest.mark.parametrize( - "df, expected_number", + "df_data, expected_number", [ - (DataFrame({"a": [1, 2]}), 1), - (DataFrame({"a": [1, 2], "b": [3, 4]}), 2), - (DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}), 3), + ({"a": [1, 2]}, 1), + ({"a": [1, 2], "b": [3, 4]}, 2), + ({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, 3), ], ) - def test_to_latex_longtable_continued_on_next_page(self, df, expected_number): + def test_to_latex_longtable_continued_on_next_page(self, df_data, expected_number): + df = DataFrame(df_data) result = df.to_latex(index=False, longtable=True) assert rf"\multicolumn{{{expected_number}}}" in result diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index d5ea470af79d6..28b613fa1f6f6 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -165,11 +165,9 @@ def test_as_json_table_type_bool_data(self, bool_type): def test_as_json_table_type_date_data(self, date_data): assert as_json_table_type(date_data.dtype) == "datetime" - @pytest.mark.parametrize( - "str_data", - [pd.Series(["a", "b"], dtype=object), pd.Index(["a", "b"], dtype=object)], - ) - def test_as_json_table_type_string_data(self, str_data): + @pytest.mark.parametrize("klass", [pd.Series, pd.Index]) + def test_as_json_table_type_string_data(self, klass): + str_data = klass(["a", "b"], dtype=object) assert as_json_table_type(str_data.dtype) == "string" @pytest.mark.parametrize( @@ -700,20 +698,27 @@ class TestTableOrientReader: }, ], ) - def test_read_json_table_orient(self, index_nm, vals, recwarn): + def test_read_json_table_orient(self, index_nm, vals): df = DataFrame(vals, index=pd.Index(range(4), name=index_nm)) - out = df.to_json(orient="table") + out = StringIO(df.to_json(orient="table")) result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result) - @pytest.mark.parametrize("index_nm", [None, "idx", "index"]) @pytest.mark.parametrize( - "vals", - [{"timedeltas": pd.timedelta_range("1h", periods=4, freq="min")}], + "index_nm", + [ + None, + "idx", + pytest.param( + "index", + marks=pytest.mark.filterwarnings("ignore:Index name:UserWarning"), + ), + ], ) - def test_read_json_table_orient_raises(self, index_nm, vals, recwarn): + def test_read_json_table_orient_raises(self, index_nm): + vals = {"timedeltas": pd.timedelta_range("1h", periods=4, freq="min")} df = DataFrame(vals, index=pd.Index(range(4), name=index_nm)) - out = df.to_json(orient="table") + out = StringIO(df.to_json(orient="table")) with pytest.raises(NotImplementedError, match="can not yet read "): pd.read_json(out, orient="table") @@ -744,14 +749,14 @@ def test_read_json_table_orient_raises(self, index_nm, vals, recwarn): }, ], ) - def test_read_json_table_period_orient(self, index_nm, vals, recwarn): + def test_read_json_table_period_orient(self, index_nm, vals): df = DataFrame( vals, index=pd.Index( (pd.Period(f"2022Q{q}") for q in range(1, 5)), name=index_nm ), ) - out = df.to_json(orient="table") + out = StringIO(df.to_json(orient="table")) result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result) @@ -787,10 +792,10 @@ def test_read_json_table_period_orient(self, index_nm, vals, recwarn): }, ], ) - def test_read_json_table_timezones_orient(self, idx, vals, recwarn): + def test_read_json_table_timezones_orient(self, idx, vals): # GH 35973 df = DataFrame(vals, index=idx) - out = df.to_json(orient="table") + out = StringIO(df.to_json(orient="table")) result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result) @@ -861,12 +866,12 @@ def test_read_json_orient_table_old_schema_version(self): tm.assert_frame_equal(expected, result) @pytest.mark.parametrize("freq", ["M", "2M", "Q", "2Q", "Y", "2Y"]) - def test_read_json_table_orient_period_depr_freq(self, freq, recwarn): + def test_read_json_table_orient_period_depr_freq(self, freq): # GH#9586 df = DataFrame( {"ints": [1, 2]}, index=pd.PeriodIndex(["2020-01", "2021-06"], freq=freq), ) - out = df.to_json(orient="table") + out = StringIO(df.to_json(orient="table")) result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result) diff --git a/pandas/tests/io/json/test_json_table_schema_ext_dtype.py b/pandas/tests/io/json/test_json_table_schema_ext_dtype.py index 015b27d0b3606..68c7a96920533 100644 --- a/pandas/tests/io/json/test_json_table_schema_ext_dtype.py +++ b/pandas/tests/io/json/test_json_table_schema_ext_dtype.py @@ -61,54 +61,33 @@ def test_build_table_schema(self): class TestTableSchemaType: - @pytest.mark.parametrize( - "date_data", - [ - DateArray([dt.date(2021, 10, 10)]), - DateArray(dt.date(2021, 10, 10)), - Series(DateArray(dt.date(2021, 10, 10))), - ], - ) - def test_as_json_table_type_ext_date_array_dtype(self, date_data): + @pytest.mark.parametrize("box", [lambda x: x, Series]) + def test_as_json_table_type_ext_date_array_dtype(self, box): + date_data = box(DateArray([dt.date(2021, 10, 10)])) assert as_json_table_type(date_data.dtype) == "any" def test_as_json_table_type_ext_date_dtype(self): assert as_json_table_type(DateDtype()) == "any" - @pytest.mark.parametrize( - "decimal_data", - [ - DecimalArray([decimal.Decimal(10)]), - Series(DecimalArray([decimal.Decimal(10)])), - ], - ) - def test_as_json_table_type_ext_decimal_array_dtype(self, decimal_data): + @pytest.mark.parametrize("box", [lambda x: x, Series]) + def test_as_json_table_type_ext_decimal_array_dtype(self, box): + decimal_data = box(DecimalArray([decimal.Decimal(10)])) assert as_json_table_type(decimal_data.dtype) == "number" def test_as_json_table_type_ext_decimal_dtype(self): assert as_json_table_type(DecimalDtype()) == "number" - @pytest.mark.parametrize( - "string_data", - [ - array(["pandas"], dtype="string"), - Series(array(["pandas"], dtype="string")), - ], - ) - def test_as_json_table_type_ext_string_array_dtype(self, string_data): + @pytest.mark.parametrize("box", [lambda x: x, Series]) + def test_as_json_table_type_ext_string_array_dtype(self, box): + string_data = box(array(["pandas"], dtype="string")) assert as_json_table_type(string_data.dtype) == "any" def test_as_json_table_type_ext_string_dtype(self): assert as_json_table_type(StringDtype()) == "any" - @pytest.mark.parametrize( - "integer_data", - [ - array([10], dtype="Int64"), - Series(array([10], dtype="Int64")), - ], - ) - def test_as_json_table_type_ext_integer_array_dtype(self, integer_data): + @pytest.mark.parametrize("box", [lambda x: x, Series]) + def test_as_json_table_type_ext_integer_array_dtype(self, box): + integer_data = box(array([10], dtype="Int64")) assert as_json_table_type(integer_data.dtype) == "integer" def test_as_json_table_type_ext_integer_dtype(self): diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 7254fd7cb345d..2a2b4053be565 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1895,17 +1895,12 @@ def test_frame_int_overflow(self): result = read_json(StringIO(encoded_json)) tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize( - "dataframe,expected", - [ - ( - DataFrame({"x": [1, 2, 3], "y": ["a", "b", "c"]}), - '{"(0, \'x\')":1,"(0, \'y\')":"a","(1, \'x\')":2,' - '"(1, \'y\')":"b","(2, \'x\')":3,"(2, \'y\')":"c"}', - ) - ], - ) - def test_json_multiindex(self, dataframe, expected): + def test_json_multiindex(self): + dataframe = DataFrame({"x": [1, 2, 3], "y": ["a", "b", "c"]}) + expected = ( + '{"(0, \'x\')":1,"(0, \'y\')":"a","(1, \'x\')":2,' + '"(1, \'y\')":"b","(2, \'x\')":3,"(2, \'y\')":"c"}' + ) series = dataframe.stack(future_stack=True) result = series.to_json(orient="index") assert result == expected diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index c7d2a5845b50e..ce7bb74240c53 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -177,10 +177,7 @@ def test_encode_dict_with_unicode_keys(self, unicode_key): unicode_dict = {unicode_key: "value1"} assert unicode_dict == ujson.ujson_loads(ujson.ujson_dumps(unicode_dict)) - @pytest.mark.parametrize( - "double_input", - [math.pi, -math.pi], # Should work with negatives too. - ) + @pytest.mark.parametrize("double_input", [math.pi, -math.pi]) def test_encode_double_conversion(self, double_input): output = ujson.ujson_dumps(double_input) assert round(double_input, 5) == round(json.loads(output), 5) @@ -520,10 +517,7 @@ def test_decode_invalid_dict(self, invalid_dict): with pytest.raises(ValueError, match=msg): ujson.ujson_loads(invalid_dict) - @pytest.mark.parametrize( - "numeric_int_as_str", - ["31337", "-31337"], # Should work with negatives. - ) + @pytest.mark.parametrize("numeric_int_as_str", ["31337", "-31337"]) def test_decode_numeric_int(self, numeric_int_as_str): assert int(numeric_int_as_str) == ujson.ujson_loads(numeric_int_as_str) diff --git a/pandas/tests/io/parser/common/test_index.py b/pandas/tests/io/parser/common/test_index.py index 038c684c90c9e..7cdaac1a284cd 100644 --- a/pandas/tests/io/parser/common/test_index.py +++ b/pandas/tests/io/parser/common/test_index.py @@ -145,20 +145,21 @@ def test_multi_index_no_level_names_implicit(all_parsers): @xfail_pyarrow # TypeError: an integer is required @pytest.mark.parametrize( - "data,expected,header", + "data,columns,header", [ - ("a,b", DataFrame(columns=["a", "b"]), [0]), + ("a,b", ["a", "b"], [0]), ( "a,b\nc,d", - DataFrame(columns=MultiIndex.from_tuples([("a", "c"), ("b", "d")])), + MultiIndex.from_tuples([("a", "c"), ("b", "d")]), [0, 1], ), ], ) @pytest.mark.parametrize("round_trip", [True, False]) -def test_multi_index_blank_df(all_parsers, data, expected, header, round_trip): +def test_multi_index_blank_df(all_parsers, data, columns, header, round_trip): # see gh-14545 parser = all_parsers + expected = DataFrame(columns=columns) data = expected.to_csv(index=False) if round_trip else data result = parser.read_csv(StringIO(data), header=header) diff --git a/pandas/tests/io/parser/common/test_ints.py b/pandas/tests/io/parser/common/test_ints.py index a3167346c64ef..e77958b0e9acc 100644 --- a/pandas/tests/io/parser/common/test_ints.py +++ b/pandas/tests/io/parser/common/test_ints.py @@ -40,31 +40,29 @@ def test_int_conversion(all_parsers): ( "A,B\nTrue,1\nFalse,2\nTrue,3", {}, - DataFrame([[True, 1], [False, 2], [True, 3]], columns=["A", "B"]), + [[True, 1], [False, 2], [True, 3]], ), ( "A,B\nYES,1\nno,2\nyes,3\nNo,3\nYes,3", {"true_values": ["yes", "Yes", "YES"], "false_values": ["no", "NO", "No"]}, - DataFrame( - [[True, 1], [False, 2], [True, 3], [False, 3], [True, 3]], - columns=["A", "B"], - ), + [[True, 1], [False, 2], [True, 3], [False, 3], [True, 3]], ), ( "A,B\nTRUE,1\nFALSE,2\nTRUE,3", {}, - DataFrame([[True, 1], [False, 2], [True, 3]], columns=["A", "B"]), + [[True, 1], [False, 2], [True, 3]], ), ( "A,B\nfoo,bar\nbar,foo", {"true_values": ["foo"], "false_values": ["bar"]}, - DataFrame([[True, False], [False, True]], columns=["A", "B"]), + [[True, False], [False, True]], ), ], ) def test_parse_bool(all_parsers, data, kwargs, expected): parser = all_parsers result = parser.read_csv(StringIO(data), **kwargs) + expected = DataFrame(expected, columns=["A", "B"]) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py index 155e52d76e895..f2d5c77121467 100644 --- a/pandas/tests/io/parser/test_encoding.py +++ b/pandas/tests/io/parser/test_encoding.py @@ -97,22 +97,22 @@ def test_unicode_encoding(all_parsers, csv_dir_path): "data,kwargs,expected", [ # Basic test - ("a\n1", {}, DataFrame({"a": [1]})), + ("a\n1", {}, [1]), # "Regular" quoting - ('"a"\n1', {"quotechar": '"'}, DataFrame({"a": [1]})), + ('"a"\n1', {"quotechar": '"'}, [1]), # Test in a data row instead of header - ("b\n1", {"names": ["a"]}, DataFrame({"a": ["b", "1"]})), + ("b\n1", {"names": ["a"]}, ["b", "1"]), # Test in empty data row with skipping - ("\n1", {"names": ["a"], "skip_blank_lines": True}, DataFrame({"a": [1]})), + ("\n1", {"names": ["a"], "skip_blank_lines": True}, [1]), # Test in empty data row without skipping ( "\n1", {"names": ["a"], "skip_blank_lines": False}, - DataFrame({"a": [np.nan, 1]}), + [np.nan, 1], ), ], ) -def test_utf8_bom(all_parsers, data, kwargs, expected, request): +def test_utf8_bom(all_parsers, data, kwargs, expected): # see gh-4793 parser = all_parsers bom = "\ufeff" @@ -131,6 +131,7 @@ def _encode_data_with_bom(_data): pytest.skip(reason="https://github.com/apache/arrow/issues/38676") result = parser.read_csv(_encode_data_with_bom(data), encoding=utf8, **kwargs) + expected = DataFrame({"a": expected}) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py index ca106fa772e82..6ebfc8f337c10 100644 --- a/pandas/tests/io/parser/test_na_values.py +++ b/pandas/tests/io/parser/test_na_values.py @@ -263,43 +263,35 @@ def test_na_value_dict_multi_index(all_parsers, index_col, expected): [ ( {}, - DataFrame( - { - "A": ["a", "b", np.nan, "d", "e", np.nan, "g"], - "B": [1, 2, 3, 4, 5, 6, 7], - "C": ["one", "two", "three", np.nan, "five", np.nan, "seven"], - } - ), + { + "A": ["a", "b", np.nan, "d", "e", np.nan, "g"], + "B": [1, 2, 3, 4, 5, 6, 7], + "C": ["one", "two", "three", np.nan, "five", np.nan, "seven"], + }, ), ( {"na_values": {"A": [], "C": []}, "keep_default_na": False}, - DataFrame( - { - "A": ["a", "b", "", "d", "e", "nan", "g"], - "B": [1, 2, 3, 4, 5, 6, 7], - "C": ["one", "two", "three", "nan", "five", "", "seven"], - } - ), + { + "A": ["a", "b", "", "d", "e", "nan", "g"], + "B": [1, 2, 3, 4, 5, 6, 7], + "C": ["one", "two", "three", "nan", "five", "", "seven"], + }, ), ( {"na_values": ["a"], "keep_default_na": False}, - DataFrame( - { - "A": [np.nan, "b", "", "d", "e", "nan", "g"], - "B": [1, 2, 3, 4, 5, 6, 7], - "C": ["one", "two", "three", "nan", "five", "", "seven"], - } - ), + { + "A": [np.nan, "b", "", "d", "e", "nan", "g"], + "B": [1, 2, 3, 4, 5, 6, 7], + "C": ["one", "two", "three", "nan", "five", "", "seven"], + }, ), ( {"na_values": {"A": [], "C": []}}, - DataFrame( - { - "A": ["a", "b", np.nan, "d", "e", np.nan, "g"], - "B": [1, 2, 3, 4, 5, 6, 7], - "C": ["one", "two", "three", np.nan, "five", np.nan, "seven"], - } - ), + { + "A": ["a", "b", np.nan, "d", "e", np.nan, "g"], + "B": [1, 2, 3, 4, 5, 6, 7], + "C": ["one", "two", "three", np.nan, "five", np.nan, "seven"], + }, ), ], ) @@ -325,6 +317,7 @@ def test_na_values_keep_default(all_parsers, kwargs, expected, request): request.applymarker(mark) result = parser.read_csv(StringIO(data), **kwargs) + expected = DataFrame(expected) tm.assert_frame_equal(result, expected) @@ -561,10 +554,10 @@ def test_na_values_dict_col_index(all_parsers): ( str(2**63) + "\n" + str(2**63 + 1), {"na_values": [2**63]}, - DataFrame([str(2**63), str(2**63 + 1)]), + [str(2**63), str(2**63 + 1)], ), - (str(2**63) + ",1" + "\n,2", {}, DataFrame([[str(2**63), 1], ["", 2]])), - (str(2**63) + "\n1", {"na_values": [2**63]}, DataFrame([np.nan, 1])), + (str(2**63) + ",1" + "\n,2", {}, [[str(2**63), 1], ["", 2]]), + (str(2**63) + "\n1", {"na_values": [2**63]}, [np.nan, 1]), ], ) def test_na_values_uint64(all_parsers, data, kwargs, expected, request): @@ -581,6 +574,7 @@ def test_na_values_uint64(all_parsers, data, kwargs, expected, request): request.applymarker(mark) result = parser.read_csv(StringIO(data), header=None, **kwargs) + expected = DataFrame(expected) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 700dcde336cd1..9640d0dfe343f 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -1755,11 +1755,11 @@ def test_parse_date_column_with_empty_string(all_parsers): [ ( "a\n135217135789158401\n1352171357E+5", - DataFrame({"a": [135217135789158401, 135217135700000]}, dtype="float64"), + [135217135789158401, 135217135700000], ), ( "a\n99999999999\n123456789012345\n1234E+0", - DataFrame({"a": [99999999999, 123456789012345, 1234]}, dtype="float64"), + [99999999999, 123456789012345, 1234], ), ], ) @@ -1772,6 +1772,7 @@ def test_parse_date_float(all_parsers, data, expected, parse_dates): parser = all_parsers result = parser.read_csv(StringIO(data), parse_dates=parse_dates) + expected = DataFrame({"a": expected}, dtype="float64") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py index dbd474c6ae0b9..dc3c527e82202 100644 --- a/pandas/tests/io/parser/test_python_parser_only.py +++ b/pandas/tests/io/parser/test_python_parser_only.py @@ -528,21 +528,17 @@ def test_no_thousand_convert_with_dot_for_non_numeric_cols(python_parser_only, d [ ( {"a": str, "b": np.float64, "c": np.int64}, - DataFrame( - { - "b": [16000.1, 0, 23000], - "c": [0, 4001, 131], - } - ), + { + "b": [16000.1, 0, 23000], + "c": [0, 4001, 131], + }, ), ( str, - DataFrame( - { - "b": ["16,000.1", "0", "23,000"], - "c": ["0", "4,001", "131"], - } - ), + { + "b": ["16,000.1", "0", "23,000"], + "c": ["0", "4,001", "131"], + }, ), ], ) @@ -560,5 +556,6 @@ def test_no_thousand_convert_for_non_numeric_cols(python_parser_only, dtype, exp dtype=dtype, thousands=",", ) + expected = DataFrame(expected) expected.insert(0, "a", ["0000,7995", "3,03,001,00514", "4923,600,041"]) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_skiprows.py b/pandas/tests/io/parser/test_skiprows.py index 0ca47ded7ba8a..3cd2351f84c7a 100644 --- a/pandas/tests/io/parser/test_skiprows.py +++ b/pandas/tests/io/parser/test_skiprows.py @@ -246,8 +246,8 @@ def test_skiprows_infield_quote(all_parsers): @pytest.mark.parametrize( "kwargs,expected", [ - ({}, DataFrame({"1": [3, 5]})), - ({"header": 0, "names": ["foo"]}, DataFrame({"foo": [3, 5]})), + ({}, "1"), + ({"header": 0, "names": ["foo"]}, "foo"), ], ) def test_skip_rows_callable(all_parsers, kwargs, expected): @@ -255,6 +255,7 @@ def test_skip_rows_callable(all_parsers, kwargs, expected): data = "a\n1\n2\n3\n4\n5" result = parser.read_csv(StringIO(data), skiprows=lambda x: x % 2 == 0, **kwargs) + expected = DataFrame({expected: [3, 5]}) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/usecols/test_usecols_basic.py b/pandas/tests/io/parser/usecols/test_usecols_basic.py index 767fba666e417..214070b1ac5f2 100644 --- a/pandas/tests/io/parser/usecols/test_usecols_basic.py +++ b/pandas/tests/io/parser/usecols/test_usecols_basic.py @@ -389,20 +389,18 @@ def test_incomplete_first_row(all_parsers, usecols): "19,29,39\n" * 2 + "10,20,30,40", [0, 1, 2], {"header": None}, - DataFrame([[19, 29, 39], [19, 29, 39], [10, 20, 30]]), + [[19, 29, 39], [19, 29, 39], [10, 20, 30]], ), # see gh-9549 ( ("A,B,C\n1,2,3\n3,4,5\n1,2,4,5,1,6\n1,2,3,,,1,\n1,2,3\n5,6,7"), ["A", "B", "C"], {}, - DataFrame( - { - "A": [1, 3, 1, 1, 1, 5], - "B": [2, 4, 2, 2, 2, 6], - "C": [3, 5, 4, 3, 3, 7], - } - ), + { + "A": [1, 3, 1, 1, 1, 5], + "B": [2, 4, 2, 2, 2, 6], + "C": [3, 5, 4, 3, 3, 7], + }, ), ], ) @@ -410,6 +408,7 @@ def test_uneven_length_cols(all_parsers, data, usecols, kwargs, expected): # see gh-8985 parser = all_parsers result = parser.read_csv(StringIO(data), usecols=usecols, **kwargs) + expected = DataFrame(expected) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/pytables/test_categorical.py b/pandas/tests/io/pytables/test_categorical.py index 58ebdfe7696b4..2ab9f1ac8be1c 100644 --- a/pandas/tests/io/pytables/test_categorical.py +++ b/pandas/tests/io/pytables/test_categorical.py @@ -190,25 +190,19 @@ def test_categorical_nan_only_columns(tmp_path, setup_path): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize( - "where, df, expected", - [ - ('col=="q"', DataFrame({"col": ["a", "b", "s"]}), DataFrame({"col": []})), - ('col=="a"', DataFrame({"col": ["a", "b", "s"]}), DataFrame({"col": ["a"]})), - ], -) -def test_convert_value( - tmp_path, setup_path, where: str, df: DataFrame, expected: DataFrame -): +@pytest.mark.parametrize("where, expected", [["q", []], ["a", ["a"]]]) +def test_convert_value(tmp_path, setup_path, where: str, expected): # GH39420 # Check that read_hdf with categorical columns can filter by where condition. + df = DataFrame({"col": ["a", "b", "s"]}) df.col = df.col.astype("category") max_widths = {"col": 1} categorical_values = sorted(df.col.unique()) + expected = DataFrame({"col": expected}) expected.col = expected.col.astype("category") expected.col = expected.col.cat.set_categories(categorical_values) path = tmp_path / setup_path df.to_hdf(path, key="df", format="table", min_itemsize=max_widths) - result = read_hdf(path, where=where) + result = read_hdf(path, where=f'col=="{where}"') tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 6a2d460232165..420d82c3af7e3 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -1334,8 +1334,8 @@ def test_to_html_borderless(self): @pytest.mark.parametrize( "displayed_only,exp0,exp1", [ - (True, DataFrame(["foo"]), None), - (False, DataFrame(["foo bar baz qux"]), DataFrame(["foo"])), + (True, ["foo"], None), + (False, ["foo bar baz qux"], DataFrame(["foo"])), ], ) def test_displayed_only(self, displayed_only, exp0, exp1, flavor_read_html): @@ -1360,6 +1360,7 @@ def test_displayed_only(self, displayed_only, exp0, exp1, flavor_read_html): """ + exp0 = DataFrame(exp0) dfs = flavor_read_html(StringIO(data), displayed_only=displayed_only) tm.assert_frame_equal(dfs[0], exp0) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index a6967732cf702..83a962ec26a7e 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -827,13 +827,7 @@ def test_s3_roundtrip(self, df_compat, s3_public_bucket, pa, s3so): ) @pytest.mark.single_cpu - @pytest.mark.parametrize( - "partition_col", - [ - ["A"], - [], - ], - ) + @pytest.mark.parametrize("partition_col", [["A"], []]) def test_s3_roundtrip_for_dir( self, df_compat, s3_public_bucket, pa, partition_col, s3so ): diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py index 37251a58b0c11..a123f6dd52c08 100644 --- a/pandas/tests/io/xml/test_to_xml.py +++ b/pandas/tests/io/xml/test_to_xml.py @@ -298,10 +298,8 @@ def test_index_false_rename_row_root(xml_books, parser): assert output == expected -@pytest.mark.parametrize( - "offset_index", [list(range(10, 13)), [str(i) for i in range(10, 13)]] -) -def test_index_false_with_offset_input_index(parser, offset_index, geom_df): +@pytest.mark.parametrize("typ", [int, str]) +def test_index_false_with_offset_input_index(parser, typ, geom_df): """ Tests that the output does not contain the `` field when the index of the input Dataframe has an offset. @@ -328,7 +326,7 @@ def test_index_false_with_offset_input_index(parser, offset_index, geom_df): 3.0 """ - + offset_index = [typ(i) for i in range(10, 13)] offset_geom_df = geom_df.copy() offset_geom_df.index = Index(offset_index) output = offset_geom_df.to_xml(index=False, parser=parser)