From b48957365eef30a80435249cc79e3299f717236e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 7 Feb 2022 10:59:20 -0800 Subject: [PATCH] TST: Clean test_pickle --- pandas/tests/io/test_pickle.py | 167 +++++++++++++-------------------- 1 file changed, 66 insertions(+), 101 deletions(-) diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 4700e307f2407..56fc42aa0e2db 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -73,14 +73,13 @@ def current_pickle_data(): # --------------------- # comparison functions # --------------------- -def compare_element(result, expected, typ, version=None): +def compare_element(result, expected, typ): if isinstance(expected, Index): tm.assert_index_equal(expected, result) return if typ.startswith("sp_"): - comparator = tm.assert_equal - comparator(result, expected) + tm.assert_equal(result, expected) elif typ == "timestamp": if expected is pd.NaT: assert result is pd.NaT @@ -92,74 +91,7 @@ def compare_element(result, expected, typ, version=None): comparator(result, expected) -def compare(data, vf, version): - - data = pd.read_pickle(vf) - - m = globals() - for typ, dv in data.items(): - for dt, result in dv.items(): - expected = data[typ][dt] - - # use a specific comparator - # if available - comparator = f"compare_{typ}_{dt}" - - comparator = m.get(comparator, m["compare_element"]) - comparator(result, expected, typ, version) - return data - - -def compare_series_ts(result, expected, typ, version): - # GH 7748 - tm.assert_series_equal(result, expected) - assert result.index.freq == expected.index.freq - assert not result.index.freq.normalize - tm.assert_series_equal(result > 0, expected > 0) - - # GH 9291 - freq = result.index.freq - assert freq + Day(1) == Day(2) - - res = freq + pd.Timedelta(hours=1) - assert isinstance(res, pd.Timedelta) - assert res == pd.Timedelta(days=1, hours=1) - - res = freq + pd.Timedelta(nanoseconds=1) - assert isinstance(res, pd.Timedelta) - assert res == pd.Timedelta(days=1, nanoseconds=1) - - -def compare_series_dt_tz(result, expected, typ, version): - tm.assert_series_equal(result, expected) - - -def compare_series_cat(result, expected, typ, version): - tm.assert_series_equal(result, expected) - - -def compare_frame_dt_mixed_tzs(result, expected, typ, version): - tm.assert_frame_equal(result, expected) - - -def compare_frame_cat_onecol(result, expected, typ, version): - tm.assert_frame_equal(result, expected) - - -def compare_frame_cat_and_float(result, expected, typ, version): - compare_frame_cat_onecol(result, expected, typ, version) - - -def compare_index_period(result, expected, typ, version): - tm.assert_index_equal(result, expected) - assert isinstance(result.freq, MonthEnd) - assert result.freq == MonthEnd() - assert result.freqstr == "M" - tm.assert_index_equal(result.shift(2), expected.shift(2)) - - -here = os.path.dirname(__file__) -legacy_dirname = os.path.join(here, "data", "legacy_pickle") +legacy_dirname = os.path.join(os.path.dirname(__file__), "data", "legacy_pickle") files = glob.glob(os.path.join(legacy_dirname, "*", "*.pickle")) @@ -171,14 +103,53 @@ def legacy_pickle(request, datapath): # --------------------- # tests # --------------------- -def test_pickles(current_pickle_data, legacy_pickle): +def test_pickles(legacy_pickle): if not is_platform_little_endian(): pytest.skip("known failure on non-little endian") - version = os.path.basename(os.path.dirname(legacy_pickle)) with catch_warnings(record=True): simplefilter("ignore") - compare(current_pickle_data, legacy_pickle, version) + + data = pd.read_pickle(legacy_pickle) + + for typ, dv in data.items(): + for dt, result in dv.items(): + expected = data[typ][dt] + + if typ == "series" and dt == "ts": + # GH 7748 + tm.assert_series_equal(result, expected) + assert result.index.freq == expected.index.freq + assert not result.index.freq.normalize + tm.assert_series_equal(result > 0, expected > 0) + + # GH 9291 + freq = result.index.freq + assert freq + Day(1) == Day(2) + + res = freq + pd.Timedelta(hours=1) + assert isinstance(res, pd.Timedelta) + assert res == pd.Timedelta(days=1, hours=1) + + res = freq + pd.Timedelta(nanoseconds=1) + assert isinstance(res, pd.Timedelta) + assert res == pd.Timedelta(days=1, nanoseconds=1) + elif typ == "index" and dt == "period": + tm.assert_index_equal(result, expected) + assert isinstance(result.freq, MonthEnd) + assert result.freq == MonthEnd() + assert result.freqstr == "M" + tm.assert_index_equal(result.shift(2), expected.shift(2)) + elif typ == "series" and dt in ("dt_tz", "cat"): + tm.assert_series_equal(result, expected) + elif typ == "frame" and dt in ( + "dt_mixed_tzs", + "cat_onecol", + "cat_and_float", + ): + tm.assert_frame_equal(result, expected) + else: + compare_element(result, expected, typ) def python_pickler(obj, path): @@ -208,32 +179,32 @@ def python_unpickler(path): ), ], ) +@pytest.mark.parametrize("writer", [pd.to_pickle, python_pickler]) @pytest.mark.filterwarnings("ignore:The 'freq' argument in Timestamp:FutureWarning") -def test_round_trip_current(current_pickle_data, pickle_writer): +def test_round_trip_current(current_pickle_data, pickle_writer, writer): data = current_pickle_data for typ, dv in data.items(): for dt, expected in dv.items(): - for writer in [pd.to_pickle, python_pickler]: - with tm.ensure_clean() as path: - # test writing with each pickler - pickle_writer(expected, path) + with tm.ensure_clean() as path: + # test writing with each pickler + pickle_writer(expected, path) - # test reading with each unpickler - result = pd.read_pickle(path) - compare_element(result, expected, typ) + # test reading with each unpickler + result = pd.read_pickle(path) + compare_element(result, expected, typ) - result = python_unpickler(path) - compare_element(result, expected, typ) + result = python_unpickler(path) + compare_element(result, expected, typ) - # and the same for file objects (GH 35679) - with open(path, mode="wb") as handle: - writer(expected, path) - handle.seek(0) # shouldn't close file handle - with open(path, mode="rb") as handle: - result = pd.read_pickle(handle) - handle.seek(0) # shouldn't close file handle - compare_element(result, expected, typ) + # and the same for file objects (GH 35679) + with open(path, mode="wb") as handle: + writer(expected, path) + handle.seek(0) # shouldn't close file handle + with open(path, mode="rb") as handle: + result = pd.read_pickle(handle) + handle.seek(0) # shouldn't close file handle + compare_element(result, expected, typ) def test_pickle_path_pathlib(): @@ -248,7 +219,8 @@ def test_pickle_path_localpath(): tm.assert_frame_equal(df, result) -def test_legacy_sparse_warning(datapath): +@pytest.mark.parametrize("typ", ["sparseseries", "sparseframe"]) +def test_legacy_sparse_warning(datapath, typ): """ Generated with @@ -264,14 +236,7 @@ def test_legacy_sparse_warning(datapath): with tm.assert_produces_warning(FutureWarning): simplefilter("ignore", DeprecationWarning) # from boto pd.read_pickle( - datapath("io", "data", "pickle", "sparseseries-0.20.3.pickle.gz"), - compression="gzip", - ) - - with tm.assert_produces_warning(FutureWarning): - simplefilter("ignore", DeprecationWarning) # from boto - pd.read_pickle( - datapath("io", "data", "pickle", "sparseframe-0.20.3.pickle.gz"), + datapath("io", "data", "pickle", f"{typ}-0.20.3.pickle.gz"), compression="gzip", )