From d8a92ee37beb45daeded08f2dc31e59fe47b8790 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 29 Nov 2023 10:42:30 -0800 Subject: [PATCH 1/6] Remove makeObjectSeries --- pandas/_testing/__init__.py | 7 ------- pandas/conftest.py | 6 +++--- pandas/tests/arithmetic/test_object.py | 3 +-- pandas/tests/dtypes/test_missing.py | 8 ++++++-- 4 files changed, 10 insertions(+), 14 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index b1918e1b1d7c2..af97e6c189b64 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -356,12 +356,6 @@ def makeDateIndex( return DatetimeIndex(dr, name=name, **kwargs) -def makeObjectSeries(name=None) -> Series: - data = [f"foo_{i}" for i in range(_N)] - index = Index([f"bar_{i}" for i in range(_N)]) - return Series(data, index=index, name=name, dtype=object) - - def getSeriesData() -> dict[str, Series]: index = Index([f"foo_{i}" for i in range(_N)]) return { @@ -934,7 +928,6 @@ def shares_memory(left, right) -> bool: "makeCustomIndex", "makeDataFrame", "makeDateIndex", - "makeObjectSeries", "makeTimeDataFrame", "makeTimeSeries", "maybe_produces_warning", diff --git a/pandas/conftest.py b/pandas/conftest.py index 1bc067eb32aef..fb8e9119d1f42 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -751,9 +751,9 @@ def object_series() -> Series: """ Fixture for Series of dtype object with Index of unique strings """ - s = tm.makeObjectSeries() - s.name = "objects" - return s + data = [f"foo_{i}" for i in range(30)] + index = Index([f"bar_{i}" for i in range(30)], dtype=object) + return Series(data, index=index, name="objects", dtype=object) @pytest.fixture diff --git a/pandas/tests/arithmetic/test_object.py b/pandas/tests/arithmetic/test_object.py index 6b36f447eb7d5..7d27f940daa4c 100644 --- a/pandas/tests/arithmetic/test_object.py +++ b/pandas/tests/arithmetic/test_object.py @@ -169,8 +169,7 @@ def test_objarr_add_invalid(self, op, box_with_array): # invalid ops box = box_with_array - obj_ser = tm.makeObjectSeries() - obj_ser.name = "objects" + obj_ser = Series(list("abc"), dtype=object, name="objects") obj_ser = tm.box_expected(obj_ser, box) msg = "|".join( diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 88cec50c08aba..e1f8d8eca2537 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -78,8 +78,12 @@ def test_notna_notnull(notna_f): @pytest.mark.parametrize( "ser", [ - tm.makeObjectSeries(), - tm.makeTimeSeries(), + Series( + [str(i) for i in range(5)], + index=Index([str(i) for i in range(5)], dtype=object), + dtype=object, + ), + Series(range(5), date_range("2020-01-01", periods=5)), Series(range(5), period_range("2020-01-01", periods=5)), ], ) From 3b2d3e77a13b3f546e83597ede5b9b88909a27a9 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 29 Nov 2023 11:01:24 -0800 Subject: [PATCH 2/6] Remove makeDatetimeIndex --- pandas/_testing/__init__.py | 14 +------------- pandas/conftest.py | 9 +++++---- pandas/tests/arithmetic/test_datetime64.py | 2 +- .../tests/indexes/datetimes/methods/test_asof.py | 3 +-- .../indexes/datetimes/methods/test_isocalendar.py | 3 ++- .../tests/indexes/datetimes/test_scalar_compat.py | 2 +- pandas/tests/indexes/datetimes/test_setops.py | 6 +++--- pandas/tests/indexes/test_base.py | 8 +++++--- pandas/tests/io/pytables/test_time_series.py | 5 +++-- pandas/tests/plotting/test_converter.py | 2 +- pandas/tests/plotting/test_series.py | 2 +- pandas/tests/series/indexing/test_get.py | 6 +++++- pandas/tests/series/methods/test_fillna.py | 12 +++++++++--- pandas/tests/series/methods/test_replace.py | 8 ++++---- pandas/tests/series/test_constructors.py | 2 +- 15 files changed, 43 insertions(+), 41 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index af97e6c189b64..79d8ac31cc6f8 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -2,7 +2,6 @@ import collections from collections import Counter -from datetime import datetime from decimal import Decimal import operator import os @@ -36,12 +35,10 @@ ArrowDtype, Categorical, DataFrame, - DatetimeIndex, Index, MultiIndex, RangeIndex, Series, - bdate_range, date_range, period_range, timedelta_range, @@ -348,14 +345,6 @@ def getCols(k) -> str: return string.ascii_uppercase[:k] -def makeDateIndex( - k: int = 10, freq: Frequency = "B", name=None, **kwargs -) -> DatetimeIndex: - dt = datetime(2000, 1, 1) - dr = bdate_range(dt, periods=k, freq=freq, name=name) - return DatetimeIndex(dr, name=name, **kwargs) - - def getSeriesData() -> dict[str, Series]: index = Index([f"foo_{i}" for i in range(_N)]) return { @@ -369,7 +358,7 @@ def makeTimeSeries(nper=None, freq: Frequency = "B", name=None) -> Series: nper = _N return Series( np.random.default_rng(2).standard_normal(nper), - index=makeDateIndex(nper, freq=freq), + index=date_range("2020-01-01", periods=nper, freq=freq), name=name, ) @@ -927,7 +916,6 @@ def shares_memory(left, right) -> bool: "makeCustomDataframe", "makeCustomIndex", "makeDataFrame", - "makeDateIndex", "makeTimeDataFrame", "makeTimeSeries", "maybe_produces_warning", diff --git a/pandas/conftest.py b/pandas/conftest.py index fb8e9119d1f42..7c2bbc6e0b723 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -68,6 +68,7 @@ Series, Timedelta, Timestamp, + date_range, period_range, timedelta_range, ) @@ -608,15 +609,15 @@ def _create_mi_with_dt64tz_level(): """ # GH#8367 round trip with pickle return MultiIndex.from_product( - [[1, 2], ["a", "b"], pd.date_range("20130101", periods=3, tz="US/Eastern")], + [[1, 2], ["a", "b"], date_range("20130101", periods=3, tz="US/Eastern")], names=["one", "two", "three"], ) indices_dict = { "string": Index([f"pandas_{i}" for i in range(100)]), - "datetime": tm.makeDateIndex(100), - "datetime-tz": tm.makeDateIndex(100, tz="US/Pacific"), + "datetime": date_range("2020-01-01", periods=100), + "datetime-tz": date_range("2020-01-01", periods=100, tz="US/Pacific"), "period": period_range("2020-01-01", periods=100, freq="D"), "timedelta": timedelta_range(start="1 day", periods=100, freq="D"), "range": RangeIndex(100), @@ -631,7 +632,7 @@ def _create_mi_with_dt64tz_level(): "float32": Index(np.arange(100), dtype="float32"), "float64": Index(np.arange(100), dtype="float64"), "bool-object": Index([True, False] * 5, dtype=object), - "bool-dtype": Index(np.random.default_rng(2).standard_normal(10) < 0), + "bool-dtype": Index([True, False] * 5, dtype=bool), "complex64": Index( np.arange(100, dtype="complex64") + 1.0j * np.arange(100, dtype="complex64") ), diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 9014ba4b6093e..4bd0e6c1c3694 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -394,7 +394,7 @@ def test_dt64_compare_datetime_scalar(self, datetimelike, op, expected): class TestDatetimeIndexComparisons: # TODO: moved from tests.indexes.test_base; parametrize and de-duplicate def test_comparators(self, comparison_op): - index = tm.makeDateIndex(100) + index = date_range("2020-01-01", periods=10) element = index[len(index) // 2] element = Timestamp(element).to_datetime64() diff --git a/pandas/tests/indexes/datetimes/methods/test_asof.py b/pandas/tests/indexes/datetimes/methods/test_asof.py index f52b6da5b2f07..dc92f533087bc 100644 --- a/pandas/tests/indexes/datetimes/methods/test_asof.py +++ b/pandas/tests/indexes/datetimes/methods/test_asof.py @@ -6,7 +6,6 @@ date_range, isna, ) -import pandas._testing as tm class TestAsOf: @@ -18,7 +17,7 @@ def test_asof_partial(self): assert not isinstance(result, Index) def test_asof(self): - index = tm.makeDateIndex(100) + index = date_range("2020-01-01", periods=10) dt = index[0] assert index.asof(dt) == dt diff --git a/pandas/tests/indexes/datetimes/methods/test_isocalendar.py b/pandas/tests/indexes/datetimes/methods/test_isocalendar.py index 3f5a18675735a..97f1003e0f43f 100644 --- a/pandas/tests/indexes/datetimes/methods/test_isocalendar.py +++ b/pandas/tests/indexes/datetimes/methods/test_isocalendar.py @@ -1,6 +1,7 @@ from pandas import ( DataFrame, DatetimeIndex, + date_range, ) import pandas._testing as tm @@ -21,7 +22,7 @@ def test_isocalendar_returns_correct_values_close_to_new_year_with_tz(): def test_dti_timestamp_isocalendar_fields(): - idx = tm.makeDateIndex(100) + idx = date_range("2020-01-01", periods=10) expected = tuple(idx.isocalendar().iloc[-1].to_list()) result = idx[-1].isocalendar() assert result == expected diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py index 81992219d71b4..e93fc0e2a4e2e 100644 --- a/pandas/tests/indexes/datetimes/test_scalar_compat.py +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -106,7 +106,7 @@ def test_dti_timetz(self, tz_naive_fixture): ) def test_dti_timestamp_fields(self, field): # extra fields from DatetimeIndex like quarter and week - idx = tm.makeDateIndex(100) + idx = date_range("2020-01-01", periods=10) expected = getattr(idx, field)[-1] result = getattr(Timestamp(idx[-1]), field) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 993f88db38ea6..3ed7fcc027a06 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -42,7 +42,7 @@ class TestDatetimeIndexSetOps: # TODO: moved from test_datetimelike; dedup with version below def test_union2(self, sort): - everything = tm.makeDateIndex(10) + everything = date_range("2020-01-01", periods=10) first = everything[:5] second = everything[5:] union = first.union(second, sort=sort) @@ -50,7 +50,7 @@ def test_union2(self, sort): @pytest.mark.parametrize("box", [np.array, Series, list]) def test_union3(self, sort, box): - everything = tm.makeDateIndex(10) + everything = date_range("2020-01-01", periods=10) first = everything[:5] second = everything[5:] @@ -203,7 +203,7 @@ def test_union_same_timezone_different_units(self): # TODO: moved from test_datetimelike; de-duplicate with version below def test_intersection2(self): - first = tm.makeDateIndex(10) + first = date_range("2020-01-01", periods=10) second = first[5:] intersect = first.intersection(second) tm.assert_index_equal(intersect, second) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 3db81c0285bd2..bb8822f047330 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -540,7 +540,9 @@ def test_map_tseries_indices_return_index(self, index): tm.assert_index_equal(expected, result) def test_map_tseries_indices_accsr_return_index(self): - date_index = tm.makeDateIndex(24, freq="h", name="hourly") + date_index = DatetimeIndex( + date_range("2020-01-01", periods=24, freq="h"), name="hourly" + ) result = date_index.map(lambda x: x.hour) expected = Index(np.arange(24, dtype="int64"), name="hourly") tm.assert_index_equal(result, expected, exact=True) @@ -1001,7 +1003,7 @@ def test_str_attribute(self, method): "index", [ Index(range(5)), - tm.makeDateIndex(10), + date_range("2020-01-01", periods=10), MultiIndex.from_tuples([("foo", "1"), ("bar", "3")]), period_range(start="2000", end="2010", freq="Y"), ], @@ -1065,7 +1067,7 @@ def test_indexing_doesnt_change_class(self): def test_outer_join_sort(self): left_index = Index(np.random.default_rng(2).permutation(15)) - right_index = tm.makeDateIndex(10) + right_index = date_range("2020-01-01", periods=10) with tm.assert_produces_warning(RuntimeWarning): result = left_index.join(right_index, how="outer") diff --git a/pandas/tests/io/pytables/test_time_series.py b/pandas/tests/io/pytables/test_time_series.py index 4afcf5600dce6..726dd0d420347 100644 --- a/pandas/tests/io/pytables/test_time_series.py +++ b/pandas/tests/io/pytables/test_time_series.py @@ -8,6 +8,7 @@ DatetimeIndex, Series, _testing as tm, + date_range, period_range, ) from pandas.tests.io.pytables.common import ensure_clean_store @@ -28,7 +29,7 @@ def test_store_datetime_fractional_secs(setup_path, unit): @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") def test_tseries_indices_series(setup_path): with ensure_clean_store(setup_path) as store: - idx = tm.makeDateIndex(10) + idx = date_range("2020-01-01", periods=10) ser = Series(np.random.default_rng(2).standard_normal(len(idx)), idx) store["a"] = ser result = store["a"] @@ -50,7 +51,7 @@ def test_tseries_indices_series(setup_path): @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") def test_tseries_indices_frame(setup_path): with ensure_clean_store(setup_path) as store: - idx = tm.makeDateIndex(10) + idx = date_range("2020-01-01", periods=10) df = DataFrame( np.random.default_rng(2).standard_normal((len(idx), 3)), index=idx ) diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py index 509e0ea5c482e..f748d7c5fc758 100644 --- a/pandas/tests/plotting/test_converter.py +++ b/pandas/tests/plotting/test_converter.py @@ -260,7 +260,7 @@ def test_time_formatter(self, time, format_expected): @pytest.mark.parametrize("freq", ("B", "ms", "s")) def test_dateindex_conversion(self, freq, dtc): rtol = 10**-9 - dateindex = tm.makeDateIndex(k=10, freq=freq) + dateindex = date_range("2020-01-01", periods=10, freq=freq) rs = dtc.convert(dateindex, None, None) xp = converter.mdates.date2num(dateindex._mpl_repr()) tm.assert_almost_equal(rs, xp, rtol=rtol) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index c8b47666e1b4a..9bf76637e1d71 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -237,7 +237,7 @@ def test_boolean(self): with pytest.raises(TypeError, match=msg): _check_plot_works(s.plot) - @pytest.mark.parametrize("index", [None, tm.makeDateIndex(k=4)]) + @pytest.mark.parametrize("index", [None, date_range("2020-01-01", periods=4)]) def test_line_area_nan_series(self, index): values = [1, 2, np.nan, 3] d = Series(values, index=index) diff --git a/pandas/tests/series/indexing/test_get.py b/pandas/tests/series/indexing/test_get.py index 61007c08b50e0..1f3711ad91903 100644 --- a/pandas/tests/series/indexing/test_get.py +++ b/pandas/tests/series/indexing/test_get.py @@ -3,8 +3,10 @@ import pandas as pd from pandas import ( + DatetimeIndex, Index, Series, + date_range, ) import pandas._testing as tm @@ -168,7 +170,9 @@ def test_get_with_default(): "arr", [ np.random.default_rng(2).standard_normal(10), - tm.makeDateIndex(10, name="a").tz_localize(tz="US/Eastern"), + DatetimeIndex(date_range("2020-01-01", periods=10), name="a").tz_localize( + tz="US/Eastern" + ), ], ) def test_get_with_ea(arr): diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index 5d0ef893d5723..a5170898b1720 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -71,7 +71,9 @@ def test_fillna_value_or_method(self, datetime_series): datetime_series.fillna(value=0, method="ffill") def test_fillna(self): - ts = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=tm.makeDateIndex(5)) + ts = Series( + [0.0, 1.0, 2.0, 3.0, 4.0], index=date_range("2020-01-01", periods=5) + ) tm.assert_series_equal(ts, ts.fillna(method="ffill")) @@ -880,7 +882,9 @@ def test_fillna_bug(self): tm.assert_series_equal(filled, expected) def test_ffill(self): - ts = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=tm.makeDateIndex(5)) + ts = Series( + [0.0, 1.0, 2.0, 3.0, 4.0], index=date_range("2020-01-01", periods=5) + ) ts.iloc[2] = np.nan tm.assert_series_equal(ts.ffill(), ts.fillna(method="ffill")) @@ -891,7 +895,9 @@ def test_ffill_mixed_dtypes_without_missing_data(self): tm.assert_series_equal(series, result) def test_bfill(self): - ts = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=tm.makeDateIndex(5)) + ts = Series( + [0.0, 1.0, 2.0, 3.0, 4.0], index=date_range("2020-01-01", periods=5) + ) ts.iloc[2] = np.nan tm.assert_series_equal(ts.bfill(), ts.fillna(method="bfill")) diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index fe0f79b766f72..477f36bdf4214 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -52,7 +52,7 @@ def test_replace_noop_doesnt_downcast(self): assert res.dtype == object def test_replace(self): - N = 100 + N = 50 ser = pd.Series(np.random.default_rng(2).standard_normal(N)) ser[0:4] = np.nan ser[6:10] = 0 @@ -70,7 +70,7 @@ def test_replace(self): ser = pd.Series( np.fabs(np.random.default_rng(2).standard_normal(N)), - tm.makeDateIndex(N), + pd.date_range("2020-01-01", periods=N), dtype=object, ) ser[:5] = np.nan @@ -290,10 +290,10 @@ def test_replace_Int_with_na(self, any_int_ea_dtype): tm.assert_series_equal(result, expected) def test_replace2(self): - N = 100 + N = 50 ser = pd.Series( np.fabs(np.random.default_rng(2).standard_normal(N)), - tm.makeDateIndex(N), + pd.date_range("2020-01-01", periods=N), dtype=object, ) ser[:5] = np.nan diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index e08f8d0c15f39..773d7e174feac 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -2147,7 +2147,7 @@ def test_series_string_inference_na_first(self): class TestSeriesConstructorIndexCoercion: def test_series_constructor_datetimelike_index_coercion(self): - idx = tm.makeDateIndex(10000) + idx = date_range("2020-01-01", periods=5) ser = Series( np.random.default_rng(2).standard_normal(len(idx)), idx.astype(object) ) From 2bbd6a60ca8606857f7df250d92eeed94221355f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 29 Nov 2023 11:02:50 -0800 Subject: [PATCH 3/6] Remove makeDataFrame --- pandas/_testing/__init__.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 79d8ac31cc6f8..57adf35f9ad89 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -373,11 +373,6 @@ def makeTimeDataFrame(nper=None, freq: Frequency = "B") -> DataFrame: return DataFrame(data) -def makeDataFrame() -> DataFrame: - data = getSeriesData() - return DataFrame(data) - - def makeCustomIndex( nentries, nlevels, @@ -915,7 +910,6 @@ def shares_memory(left, right) -> bool: "loc", "makeCustomDataframe", "makeCustomIndex", - "makeDataFrame", "makeTimeDataFrame", "makeTimeSeries", "maybe_produces_warning", From ed89785485498d129cc1df63e713a88cc2d7b346 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 29 Nov 2023 11:45:50 -0800 Subject: [PATCH 4/6] Remove getSeriesData --- pandas/_testing/__init__.py | 9 -- pandas/conftest.py | 52 +++-------- pandas/tests/frame/conftest.py | 93 +++++-------------- pandas/tests/frame/indexing/test_xs.py | 2 +- .../frame/methods/test_first_and_last.py | 4 +- pandas/tests/frame/methods/test_info.py | 4 +- pandas/tests/frame/methods/test_truncate.py | 2 +- pandas/tests/frame/test_reductions.py | 59 +++--------- pandas/tests/io/json/test_pandas.py | 17 ++-- 9 files changed, 62 insertions(+), 180 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 57adf35f9ad89..6000c828555c6 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -345,14 +345,6 @@ def getCols(k) -> str: return string.ascii_uppercase[:k] -def getSeriesData() -> dict[str, Series]: - index = Index([f"foo_{i}" for i in range(_N)]) - return { - c: Series(np.random.default_rng(i).standard_normal(_N), index=index) - for i, c in enumerate(getCols(_K)) - } - - def makeTimeSeries(nper=None, freq: Frequency = "B", name=None) -> Series: if nper is None: nper = _N @@ -903,7 +895,6 @@ def shares_memory(left, right) -> bool: "get_finest_unit", "get_obj", "get_op_from_name", - "getSeriesData", "getTimeSeriesData", "iat", "iloc", diff --git a/pandas/conftest.py b/pandas/conftest.py index 7c2bbc6e0b723..9ed6f8f43ae03 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -840,27 +840,12 @@ def int_frame() -> DataFrame: Fixture for DataFrame of ints with index of unique strings Columns are ['A', 'B', 'C', 'D'] - - A B C D - vpBeWjM651 1 0 1 0 - 5JyxmrP1En -1 0 0 0 - qEDaoD49U2 -1 1 0 0 - m66TkTfsFe 0 0 0 0 - EHPaNzEUFm -1 0 -1 0 - fpRJCevQhi 2 0 0 0 - OlQvnmfi3Q 0 0 -2 0 - ... .. .. .. .. - uB1FPlz4uP 0 0 0 1 - EcSe6yNzCU 0 0 -1 0 - L50VudaiI8 -1 1 -2 0 - y3bpw4nwIp 0 -1 0 0 - H0RdLLwrCT 1 1 0 0 - rY82K0vMwm 0 0 0 0 - 1OPIUjnkjk 2 0 0 0 - - [30 rows x 4 columns] """ - return DataFrame(tm.getSeriesData()).astype("int64") + return DataFrame( + np.ones((30, 4), dtype=np.int64), + index=Index([f"foo_{i}" for i in range(30)], dtype=object), + columns=Index(list("ABCD"), dtype=object), + ) @pytest.fixture @@ -869,27 +854,12 @@ def float_frame() -> DataFrame: Fixture for DataFrame of floats with index of unique strings Columns are ['A', 'B', 'C', 'D']. - - A B C D - P7GACiRnxd -0.465578 -0.361863 0.886172 -0.053465 - qZKh6afn8n -0.466693 -0.373773 0.266873 1.673901 - tkp0r6Qble 0.148691 -0.059051 0.174817 1.598433 - wP70WOCtv8 0.133045 -0.581994 -0.992240 0.261651 - M2AeYQMnCz -1.207959 -0.185775 0.588206 0.563938 - QEPzyGDYDo -0.381843 -0.758281 0.502575 -0.565053 - r78Jwns6dn -0.653707 0.883127 0.682199 0.206159 - ... ... ... ... ... - IHEGx9NO0T -0.277360 0.113021 -1.018314 0.196316 - lPMj8K27FA -1.313667 -0.604776 -1.305618 -0.863999 - qa66YMWQa5 1.110525 0.475310 -0.747865 0.032121 - yOa0ATsmcE -0.431457 0.067094 0.096567 -0.264962 - 65znX3uRNG 1.528446 0.160416 -0.109635 -0.032987 - eCOBvKqf3e 0.235281 1.622222 0.781255 0.392871 - xSucinXxuV -1.263557 0.252799 -0.552247 0.400426 - - [30 rows x 4 columns] - """ - return DataFrame(tm.getSeriesData()) + """ + return DataFrame( + np.random.default_rng(2).standard_normal((30, 4)), + index=Index([f"foo_{i}" for i in range(30)], dtype=object), + columns=Index(list("ABCD"), dtype=object), + ) @pytest.fixture diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py index f7ed5180b46d9..99ea565e5b60c 100644 --- a/pandas/tests/frame/conftest.py +++ b/pandas/tests/frame/conftest.py @@ -3,6 +3,7 @@ from pandas import ( DataFrame, + Index, NaT, date_range, ) @@ -44,27 +45,12 @@ def float_string_frame(): Fixture for DataFrame of floats and strings with index of unique strings Columns are ['A', 'B', 'C', 'D', 'foo']. - - A B C D foo - w3orJvq07g -1.594062 -1.084273 -1.252457 0.356460 bar - PeukuVdmz2 0.109855 -0.955086 -0.809485 0.409747 bar - ahp2KvwiM8 -1.533729 -0.142519 -0.154666 1.302623 bar - 3WSJ7BUCGd 2.484964 0.213829 0.034778 -2.327831 bar - khdAmufk0U -0.193480 -0.743518 -0.077987 0.153646 bar - LE2DZiFlrE -0.193566 -1.343194 -0.107321 0.959978 bar - HJXSJhVn7b 0.142590 1.257603 -0.659409 -0.223844 bar - ... ... ... ... ... ... - 9a1Vypttgw -1.316394 1.601354 0.173596 1.213196 bar - h5d1gVFbEy 0.609475 1.106738 -0.155271 0.294630 bar - mK9LsTQG92 1.303613 0.857040 -1.019153 0.369468 bar - oOLksd9gKH 0.558219 -0.134491 -0.289869 -0.951033 bar - 9jgoOjKyHg 0.058270 -0.496110 -0.413212 -0.852659 bar - jZLDHclHAO 0.096298 1.267510 0.549206 -0.005235 bar - lR0nxDp1C2 -2.119350 -0.794384 0.544118 0.145849 bar - - [30 rows x 5 columns] """ - df = DataFrame(tm.getSeriesData()) + df = DataFrame( + np.random.default_rng(2).standard_normal((30, 4)), + index=Index([f"foo_{i}" for i in range(30)], dtype=object), + columns=Index(list("ABCD"), dtype=object), + ) df["foo"] = "bar" return df @@ -75,31 +61,18 @@ def mixed_float_frame(): Fixture for DataFrame of different float types with index of unique strings Columns are ['A', 'B', 'C', 'D']. - - A B C D - GI7bbDaEZe -0.237908 -0.246225 -0.468506 0.752993 - KGp9mFepzA -1.140809 -0.644046 -1.225586 0.801588 - VeVYLAb1l2 -1.154013 -1.677615 0.690430 -0.003731 - kmPME4WKhO 0.979578 0.998274 -0.776367 0.897607 - CPyopdXTiz 0.048119 -0.257174 0.836426 0.111266 - 0kJZQndAj0 0.274357 -0.281135 -0.344238 0.834541 - tqdwQsaHG8 -0.979716 -0.519897 0.582031 0.144710 - ... ... ... ... ... - 7FhZTWILQj -2.906357 1.261039 -0.780273 -0.537237 - 4pUDPM4eGq -2.042512 -0.464382 -0.382080 1.132612 - B8dUgUzwTi -1.506637 -0.364435 1.087891 0.297653 - hErlVYjVv9 1.477453 -0.495515 -0.713867 1.438427 - 1BKN3o7YLs 0.127535 -0.349812 -0.881836 0.489827 - 9S4Ekn7zga 1.445518 -2.095149 0.031982 0.373204 - xN1dNn6OV6 1.425017 -0.983995 -0.363281 -0.224502 - - [30 rows x 4 columns] """ - df = DataFrame(tm.getSeriesData()) - df.A = df.A.astype("float32") - df.B = df.B.astype("float32") - df.C = df.C.astype("float16") - df.D = df.D.astype("float64") + df = DataFrame( + { + col: np.random.default_rng(2).random(30, dtype=dtype) + for col, dtype in zip( + list("ABCD"), ["float32", "float32", "float32", "float64"] + ) + }, + index=Index([f"foo_{i}" for i in range(30)], dtype=object), + ) + # not supported by numpy random + df["C"] = df["C"].astype("float16") return df @@ -109,32 +82,14 @@ def mixed_int_frame(): Fixture for DataFrame of different int types with index of unique strings Columns are ['A', 'B', 'C', 'D']. - - A B C D - mUrCZ67juP 0 1 2 2 - rw99ACYaKS 0 1 0 0 - 7QsEcpaaVU 0 1 1 1 - xkrimI2pcE 0 1 0 0 - dz01SuzoS8 0 1 255 255 - ccQkqOHX75 -1 1 0 0 - DN0iXaoDLd 0 1 0 0 - ... .. .. ... ... - Dfb141wAaQ 1 1 254 254 - IPD8eQOVu5 0 1 0 0 - CcaKulsCmv 0 1 0 0 - rIBa8gu7E5 0 1 0 0 - RP6peZmh5o 0 1 1 1 - NMb9pipQWQ 0 1 0 0 - PqgbJEzjib 0 1 3 3 - - [30 rows x 4 columns] """ - df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()}) - df.A = df.A.astype("int32") - df.B = np.ones(len(df.B), dtype="uint64") - df.C = df.C.astype("uint8") - df.D = df.C.astype("int64") - return df + return DataFrame( + { + col: np.ones(30, dtype=dtype) + for col, dtype in zip(list("ABCD"), ["int32", "uint64", "uint8", "int64"]) + }, + index=Index([f"foo_{i}" for i in range(30)], dtype=object), + ) @pytest.fixture diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index be809e3a17c8e..e09e9f5624f5b 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -57,7 +57,7 @@ def test_xs( assert xs["B"] == "1" with pytest.raises( - KeyError, match=re.escape("Timestamp('1999-12-31 00:00:00')") + KeyError, match=re.escape("Timestamp('2019-12-31 00:00:00')") ): datetime_frame.xs(datetime_frame.index[0] - BDay()) diff --git a/pandas/tests/frame/methods/test_first_and_last.py b/pandas/tests/frame/methods/test_first_and_last.py index 23355a5549a88..0014ccf054653 100644 --- a/pandas/tests/frame/methods/test_first_and_last.py +++ b/pandas/tests/frame/methods/test_first_and_last.py @@ -30,7 +30,7 @@ def test_first_subset(self, frame_or_series): with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): result = ts.first("3ME") - expected = ts[:"3/31/2000"] + expected = ts[:"3/31/2020"] tm.assert_equal(result, expected) with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): @@ -78,7 +78,7 @@ def test_last_subset(self, frame_or_series): with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg): result = ts.last("21D") - expected = ts["2000-01-10":] + expected = ts["2020-01-10":] tm.assert_equal(result, expected) with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg): diff --git a/pandas/tests/frame/methods/test_info.py b/pandas/tests/frame/methods/test_info.py index 7d9e0fe90f44c..fcb7677f03f27 100644 --- a/pandas/tests/frame/methods/test_info.py +++ b/pandas/tests/frame/methods/test_info.py @@ -532,11 +532,11 @@ def test_info_compute_numba(): with option_context("compute.use_numba", True): buf = StringIO() - df.info() + df.info(buf=buf) result = buf.getvalue() buf = StringIO() - df.info() + df.info(buf=buf) expected = buf.getvalue() assert result == expected diff --git a/pandas/tests/frame/methods/test_truncate.py b/pandas/tests/frame/methods/test_truncate.py index 4c4b04076c8d5..f348b8e823eff 100644 --- a/pandas/tests/frame/methods/test_truncate.py +++ b/pandas/tests/frame/methods/test_truncate.py @@ -60,7 +60,7 @@ def test_truncate(self, datetime_frame, frame_or_series): truncated = ts.truncate(before=ts.index[-1] + ts.index.freq) assert len(truncated) == 0 - msg = "Truncate: 2000-01-06 00:00:00 must be after 2000-02-04 00:00:00" + msg = "Truncate: 2020-01-06 00:00:00 must be after 2020-02-04 00:00:00" with pytest.raises(ValueError, match=msg): ts.truncate( before=ts.index[-1] - ts.index.freq, after=ts.index[0] + ts.index.freq diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 1ca9ec6feecae..b079c331eeebb 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -156,36 +156,18 @@ def bool_frame_with_na(): Fixture for DataFrame of booleans with index of unique strings Columns are ['A', 'B', 'C', 'D']; some entries are missing - - A B C D - zBZxY2IDGd False False False False - IhBWBMWllt False True True True - ctjdvZSR6R True False True True - AVTujptmxb False True False True - G9lrImrSWq False False False True - sFFwdIUfz2 NaN NaN NaN NaN - s15ptEJnRb NaN NaN NaN NaN - ... ... ... ... ... - UW41KkDyZ4 True True False False - l9l6XkOdqV True False False False - X2MeZfzDYA False True False False - xWkIKU7vfX False True False True - QOhL6VmpGU False False False True - 22PwkRJdat False True False False - kfboQ3VeIK True False True False - - [30 rows x 4 columns] """ - df = DataFrame(tm.getSeriesData()) > 0 - df = df.astype(object) + df = DataFrame( + np.concatenate( + [np.ones((15, 4), dtype=bool), np.zeros((15, 4), dtype=bool)], axis=0 + ), + index=Index([f"foo_{i}" for i in range(30)], dtype=object), + columns=Index(list("ABCD"), dtype=object), + dtype=object, + ) # set some NAs df.iloc[5:10] = np.nan df.iloc[15:20, -2:] = np.nan - - # For `any` tests we need to have at least one True before the first NaN - # in each column - for i in range(4): - df.iloc[i, i] = True return df @@ -195,27 +177,12 @@ def float_frame_with_na(): Fixture for DataFrame of floats with index of unique strings Columns are ['A', 'B', 'C', 'D']; some entries are missing - - A B C D - ABwBzA0ljw -1.128865 -0.897161 0.046603 0.274997 - DJiRzmbyQF 0.728869 0.233502 0.722431 -0.890872 - neMgPD5UBF 0.486072 -1.027393 -0.031553 1.449522 - 0yWA4n8VeX -1.937191 -1.142531 0.805215 -0.462018 - 3slYUbbqU1 0.153260 1.164691 1.489795 -0.545826 - soujjZ0A08 NaN NaN NaN NaN - 7W6NLGsjB9 NaN NaN NaN NaN - ... ... ... ... ... - uhfeaNkCR1 -0.231210 -0.340472 0.244717 -0.901590 - n6p7GYuBIV -0.419052 1.922721 -0.125361 -0.727717 - ZhzAeY6p1y 1.234374 -1.425359 -0.827038 -0.633189 - uWdPsORyUh 0.046738 -0.980445 -1.102965 0.605503 - 3DJA6aN590 -0.091018 -1.684734 -1.100900 0.215947 - 2GBPAzdbMk -2.883405 -1.021071 1.209877 1.633083 - sHadBoyVHw -2.223032 -0.326384 0.258931 0.245517 - - [30 rows x 4 columns] """ - df = DataFrame(tm.getSeriesData()) + df = DataFrame( + np.random.default_rng(2).standard_normal((30, 4)), + index=Index([f"foo_{i}" for i in range(30)], dtype=object), + columns=Index(list("ABCD"), dtype=object), + ) # set some NAs df.iloc[5:10] = np.nan df.iloc[15:20, -2:] = np.nan diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 5275050391ca3..37bc2812a2095 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -90,15 +90,14 @@ def assert_json_roundtrip_equal(result, expected, orient): class TestPandasContainer: @pytest.fixture def categorical_frame(self): - _seriesd = tm.getSeriesData() - - _cat_frame = DataFrame(_seriesd) - - cat = ["bah"] * 5 + ["bar"] * 5 + ["baz"] * 5 + ["foo"] * (len(_cat_frame) - 15) - _cat_frame.index = pd.CategoricalIndex(cat, name="E") - _cat_frame["E"] = list(reversed(cat)) - _cat_frame["sort"] = np.arange(len(_cat_frame), dtype="int64") - return _cat_frame + data = { + c: np.random.default_rng(i).standard_normal(30) + for i, c in enumerate(list("ABCD")) + } + cat = ["bah"] * 5 + ["bar"] * 5 + ["baz"] * 5 + ["foo"] * 15 + data["E"] = list(reversed(cat)) + data["sort"] = np.arange(30, dtype="int64") + return DataFrame(data, index=pd.CategoricalIndex(cat, name="E")) @pytest.fixture def datetime_series(self): From 8550304fcb3967ed52f9908349f0afb21b455cea Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 29 Nov 2023 12:25:00 -0800 Subject: [PATCH 5/6] use 2000 instead of 2020 --- pandas/_testing/__init__.py | 2 +- pandas/tests/frame/indexing/test_xs.py | 2 +- pandas/tests/frame/methods/test_first_and_last.py | 4 ++-- pandas/tests/frame/methods/test_truncate.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 6000c828555c6..ead00cd778d7b 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -350,7 +350,7 @@ def makeTimeSeries(nper=None, freq: Frequency = "B", name=None) -> Series: nper = _N return Series( np.random.default_rng(2).standard_normal(nper), - index=date_range("2020-01-01", periods=nper, freq=freq), + index=date_range("2000-01-01", periods=nper, freq=freq), name=name, ) diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index e09e9f5624f5b..be809e3a17c8e 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -57,7 +57,7 @@ def test_xs( assert xs["B"] == "1" with pytest.raises( - KeyError, match=re.escape("Timestamp('2019-12-31 00:00:00')") + KeyError, match=re.escape("Timestamp('1999-12-31 00:00:00')") ): datetime_frame.xs(datetime_frame.index[0] - BDay()) diff --git a/pandas/tests/frame/methods/test_first_and_last.py b/pandas/tests/frame/methods/test_first_and_last.py index 0014ccf054653..23355a5549a88 100644 --- a/pandas/tests/frame/methods/test_first_and_last.py +++ b/pandas/tests/frame/methods/test_first_and_last.py @@ -30,7 +30,7 @@ def test_first_subset(self, frame_or_series): with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): result = ts.first("3ME") - expected = ts[:"3/31/2020"] + expected = ts[:"3/31/2000"] tm.assert_equal(result, expected) with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): @@ -78,7 +78,7 @@ def test_last_subset(self, frame_or_series): with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg): result = ts.last("21D") - expected = ts["2020-01-10":] + expected = ts["2000-01-10":] tm.assert_equal(result, expected) with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg): diff --git a/pandas/tests/frame/methods/test_truncate.py b/pandas/tests/frame/methods/test_truncate.py index f348b8e823eff..4c4b04076c8d5 100644 --- a/pandas/tests/frame/methods/test_truncate.py +++ b/pandas/tests/frame/methods/test_truncate.py @@ -60,7 +60,7 @@ def test_truncate(self, datetime_frame, frame_or_series): truncated = ts.truncate(before=ts.index[-1] + ts.index.freq) assert len(truncated) == 0 - msg = "Truncate: 2020-01-06 00:00:00 must be after 2020-02-04 00:00:00" + msg = "Truncate: 2000-01-06 00:00:00 must be after 2000-02-04 00:00:00" with pytest.raises(ValueError, match=msg): ts.truncate( before=ts.index[-1] - ts.index.freq, after=ts.index[0] + ts.index.freq From 3a194ec3e32e43fac3f2af213714244ed3cc896a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 29 Nov 2023 13:21:42 -0800 Subject: [PATCH 6/6] Just use ones DF --- pandas/tests/apply/test_numba.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/apply/test_numba.py b/pandas/tests/apply/test_numba.py index 85d7baee1bdf5..57b81711ddb48 100644 --- a/pandas/tests/apply/test_numba.py +++ b/pandas/tests/apply/test_numba.py @@ -60,9 +60,10 @@ def test_numba_vs_python_indexing(): "reduction", [lambda x: x.mean(), lambda x: x.min(), lambda x: x.max(), lambda x: x.sum()], ) -def test_numba_vs_python_reductions(float_frame, reduction, apply_axis): - result = float_frame.apply(reduction, engine="numba", axis=apply_axis) - expected = float_frame.apply(reduction, engine="python", axis=apply_axis) +def test_numba_vs_python_reductions(reduction, apply_axis): + df = DataFrame(np.ones((4, 4), dtype=np.float64)) + result = df.apply(reduction, engine="numba", axis=apply_axis) + expected = df.apply(reduction, engine="python", axis=apply_axis) tm.assert_series_equal(result, expected)