diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index c5ac2a800223b..a6d9e079b26fe 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -85,7 +85,7 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor Other API changes ^^^^^^^^^^^^^^^^^ - 3rd party ``py.path`` objects are no longer explicitly supported in IO methods. Use :py:class:`pathlib.Path` objects instead (:issue:`57091`) -- +- pickled objects from pandas version less than ``1.0.0`` or Python versions less than or equal to ``2.7`` are no longer supported (:issue:`57155`) .. --------------------------------------------------------------------------- .. _whatsnew_300.deprecations: diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index cd5e6e521b79f..7a08e4ad4b260 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -76,7 +76,7 @@ cdef _nat_rdivide_op(self, other): return NotImplemented -def __nat_unpickle(*args): +def _nat_unpickle(*args): # return constant defined in the module return c_NaT @@ -360,7 +360,7 @@ class NaTType(_NaT): return self.__reduce__() def __reduce__(self): - return (__nat_unpickle, (None, )) + return (_nat_unpickle, (None, )) def __rtruediv__(self, other): return _nat_rdivide_op(self, other) diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index ff589ebba4cf6..012a53789d76a 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -1,12 +1,12 @@ """ -Support pre-0.12 series pickle compatibility. +Pickle compatibility to pandas version 1.0 """ from __future__ import annotations import contextlib -import copy +import copyreg import io -import pickle as pkl +import pickle from typing import ( TYPE_CHECKING, Any, @@ -17,7 +17,6 @@ from pandas._libs.arrays import NDArrayBacked from pandas._libs.tslibs import BaseOffset -from pandas import Index from pandas.core.arrays import ( DatetimeArray, PeriodArray, @@ -29,111 +28,19 @@ from collections.abc import Generator -def load_reduce(self) -> None: - stack = self.stack - args = stack.pop() - func = stack[-1] - - try: - stack[-1] = func(*args) - return - except TypeError as err: - # If we have a deprecated function, - # try to replace and try again. - - msg = "_reconstruct: First argument must be a sub-type of ndarray" - - if msg in str(err): - try: - cls = args[0] - stack[-1] = object.__new__(cls) - return - except TypeError: - pass - elif args and isinstance(args[0], type) and issubclass(args[0], BaseOffset): - # TypeError: object.__new__(Day) is not safe, use Day.__new__() - cls = args[0] - stack[-1] = cls.__new__(*args) - return - elif args and issubclass(args[0], PeriodArray): - cls = args[0] - stack[-1] = NDArrayBacked.__new__(*args) - return - - raise - - # If classes are moved, provide compat here. _class_locations_map = { - ("pandas.core.sparse.array", "SparseArray"): ("pandas.core.arrays", "SparseArray"), - # 15477 - ("pandas.core.base", "FrozenNDArray"): ("numpy", "ndarray"), # Re-routing unpickle block logic to go through _unpickle_block instead # for pandas <= 1.3.5 ("pandas.core.internals.blocks", "new_block"): ( "pandas._libs.internals", "_unpickle_block", ), - ("pandas.core.indexes.frozen", "FrozenNDArray"): ("numpy", "ndarray"), - ("pandas.core.base", "FrozenList"): ("pandas.core.indexes.frozen", "FrozenList"), - # 10890 - ("pandas.core.series", "TimeSeries"): ("pandas.core.series", "Series"), - ("pandas.sparse.series", "SparseTimeSeries"): ( - "pandas.core.sparse.series", - "SparseSeries", - ), - # 12588, extensions moving - ("pandas._sparse", "BlockIndex"): ("pandas._libs.sparse", "BlockIndex"), - ("pandas.tslib", "Timestamp"): ("pandas._libs.tslib", "Timestamp"), - # 18543 moving period - ("pandas._period", "Period"): ("pandas._libs.tslibs.period", "Period"), - ("pandas._libs.period", "Period"): ("pandas._libs.tslibs.period", "Period"), - # 18014 moved __nat_unpickle from _libs.tslib-->_libs.tslibs.nattype - ("pandas.tslib", "__nat_unpickle"): ( - "pandas._libs.tslibs.nattype", - "__nat_unpickle", - ), - ("pandas._libs.tslib", "__nat_unpickle"): ( + ("pandas._libs.tslibs.nattype", "__nat_unpickle"): ( "pandas._libs.tslibs.nattype", - "__nat_unpickle", - ), - # 15998 top-level dirs moving - ("pandas.sparse.array", "SparseArray"): ( - "pandas.core.arrays.sparse", - "SparseArray", + "_nat_unpickle", ), - ("pandas.indexes.base", "_new_Index"): ("pandas.core.indexes.base", "_new_Index"), - ("pandas.indexes.base", "Index"): ("pandas.core.indexes.base", "Index"), - ("pandas.indexes.numeric", "Int64Index"): ( - "pandas.core.indexes.base", - "Index", # updated in 50775 - ), - ("pandas.indexes.range", "RangeIndex"): ("pandas.core.indexes.range", "RangeIndex"), - ("pandas.indexes.multi", "MultiIndex"): ("pandas.core.indexes.multi", "MultiIndex"), - ("pandas.tseries.index", "_new_DatetimeIndex"): ( - "pandas.core.indexes.datetimes", - "_new_DatetimeIndex", - ), - ("pandas.tseries.index", "DatetimeIndex"): ( - "pandas.core.indexes.datetimes", - "DatetimeIndex", - ), - ("pandas.tseries.period", "PeriodIndex"): ( - "pandas.core.indexes.period", - "PeriodIndex", - ), - # 19269, arrays moving - ("pandas.core.categorical", "Categorical"): ("pandas.core.arrays", "Categorical"), - # 19939, add timedeltaindex, float64index compat from 15998 move - ("pandas.tseries.tdi", "TimedeltaIndex"): ( - "pandas.core.indexes.timedeltas", - "TimedeltaIndex", - ), - ("pandas.indexes.numeric", "Float64Index"): ( - "pandas.core.indexes.base", - "Index", # updated in 50775 - ), - # 50775, remove Int64Index, UInt64Index & Float64Index from codabase + # 50775, remove Int64Index, UInt64Index & Float64Index from codebase ("pandas.core.indexes.numeric", "Int64Index"): ( "pandas.core.indexes.base", "Index", @@ -153,20 +60,29 @@ def load_reduce(self) -> None: } -# our Unpickler sub-class to override methods and some dispatcher -# functions for compat and uses a non-public class of the pickle module. - +def load_reduce(self): + stack = self.stack + args = stack.pop() + func = stack[-1] -class Unpickler(pkl._Unpickler): - def find_class(self, module, name): - # override superclass - key = (module, name) - module, name = _class_locations_map.get(key, key) - return super().find_class(module, name) + try: + stack[-1] = func(*args) + return + except TypeError: + # If we have a deprecated function, + # try to replace and try again. + if args and isinstance(args[0], type) and issubclass(args[0], BaseOffset): + # TypeError: object.__new__(Day) is not safe, use Day.__new__() + cls = args[0] + stack[-1] = cls.__new__(*args) + return + elif args and issubclass(args[0], PeriodArray): + cls = args[0] + stack[-1] = NDArrayBacked.__new__(*args) + return -Unpickler.dispatch = copy.copy(Unpickler.dispatch) -Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce + raise def load_newobj(self) -> None: @@ -174,9 +90,7 @@ def load_newobj(self) -> None: cls = self.stack[-1] # compat - if issubclass(cls, Index): - obj = object.__new__(cls) - elif issubclass(cls, DatetimeArray) and not args: + if issubclass(cls, DatetimeArray) and not args: arr = np.array([], dtype="M8[ns]") obj = cls.__new__(cls, arr, arr.dtype) elif issubclass(cls, TimedeltaArray) and not args: @@ -190,29 +104,18 @@ def load_newobj(self) -> None: self.stack[-1] = obj -Unpickler.dispatch[pkl.NEWOBJ[0]] = load_newobj - - -def load_newobj_ex(self) -> None: - kwargs = self.stack.pop() - args = self.stack.pop() - cls = self.stack.pop() - - # compat - if issubclass(cls, Index): - obj = object.__new__(cls) - else: - obj = cls.__new__(cls, *args, **kwargs) - self.append(obj) - +class Unpickler(pickle.Unpickler): + dispatch_table = copyreg.dispatch_table.copy() + dispatch_table[pickle.REDUCE[0]] = load_reduce + dispatch_table[pickle.NEWOBJ[0]] = load_newobj -try: - Unpickler.dispatch[pkl.NEWOBJ_EX[0]] = load_newobj_ex -except (AttributeError, KeyError): - pass + def find_class(self, module, name): + key = (module, name) + module, name = _class_locations_map.get(key, key) + return super().find_class(module, name) -def load(fh, encoding: str | None = None, is_verbose: bool = False) -> Any: +def load(fh, encoding: str | None = None) -> Any: """ Load a pickle, with a provided encoding, @@ -220,20 +123,13 @@ def load(fh, encoding: str | None = None, is_verbose: bool = False) -> Any: ---------- fh : a filelike object encoding : an optional encoding - is_verbose : show exception output """ - try: - fh.seek(0) - if encoding is not None: - up = Unpickler(fh, encoding=encoding) - else: - up = Unpickler(fh) - # "Unpickler" has no attribute "is_verbose" [attr-defined] - up.is_verbose = is_verbose # type: ignore[attr-defined] - - return up.load() - except (ValueError, TypeError): - raise + fh.seek(0) + if encoding is not None: + up = Unpickler(fh, encoding=encoding) + else: + up = Unpickler(fh) + return up.load() def loads( @@ -257,9 +153,9 @@ def patch_pickle() -> Generator[None, None, None]: """ Temporarily patch pickle to use our unpickler. """ - orig_loads = pkl.loads + orig_loads = pickle.loads try: - setattr(pkl, "loads", loads) + setattr(pickle, "loads", loads) yield finally: - setattr(pkl, "loads", orig_loads) + setattr(pickle, "loads", orig_loads) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index fafeedc01b02b..67be5c7fa0853 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1375,23 +1375,6 @@ def _where(self, mask, value): result = type(self)._from_sequence(naive_implementation, dtype=dtype) return result - # ------------------------------------------------------------------------ - # IO - # ------------------------------------------------------------------------ - def __setstate__(self, state) -> None: - """Necessary for making this object picklable""" - if isinstance(state, tuple): - # Compat for pandas < 0.24.0 - nd_state, (fill_value, sp_index) = state - sparse_values = np.array([]) - sparse_values.__setstate__(nd_state) - - self._sparse_values = sparse_values - self._sparse_index = sp_index - self._dtype = SparseDtype(sparse_values.dtype, fill_value) - else: - self.__dict__.update(state) - def nonzero(self) -> tuple[npt.NDArray[np.int32]]: if self.fill_value == 0: return (self.sp_index.indices,) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index c6a8b61e0c51e..60285f987cce1 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1930,11 +1930,6 @@ def unpickle_block(values, mgr_locs, ndim: int) -> Block: else: raise NotImplementedError("pre-0.14.1 pickles are no longer supported") - self._post_setstate() - - def _post_setstate(self) -> None: - pass - @cache_readonly def _block(self) -> Block: return self.blocks[0] diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 89867ab4f19d0..54facd89dc5d9 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -156,7 +156,7 @@ def read_pickle( Notes ----- - read_pickle is only guaranteed to be backwards compatible to pandas 0.20.3 + read_pickle is only guaranteed to be backwards compatible to pandas 1.0.0 provided the object was serialized with to_pickle. Examples diff --git a/pandas/tests/io/data/legacy_hdf/periodindex_0.20.1_x86_64_darwin_2.7.13.h5 b/pandas/tests/io/data/legacy_hdf/periodindex_0.20.1_x86_64_darwin_2.7.13.h5 deleted file mode 100644 index 6fb92d3c564bd..0000000000000 Binary files a/pandas/tests/io/data/legacy_hdf/periodindex_0.20.1_x86_64_darwin_2.7.13.h5 and /dev/null differ diff --git a/pandas/tests/io/data/pickle/categorical.0.25.0.pickle b/pandas/tests/io/data/pickle/categorical.0.25.0.pickle deleted file mode 100644 index b756060c83d94..0000000000000 Binary files a/pandas/tests/io/data/pickle/categorical.0.25.0.pickle and /dev/null differ diff --git a/pandas/tests/io/data/pickle/test_mi_py27.pkl b/pandas/tests/io/data/pickle/test_mi_py27.pkl deleted file mode 100644 index 89021dd828108..0000000000000 Binary files a/pandas/tests/io/data/pickle/test_mi_py27.pkl and /dev/null differ diff --git a/pandas/tests/io/data/pickle/test_py27.pkl b/pandas/tests/io/data/pickle/test_py27.pkl deleted file mode 100644 index 5308b864bc0c7..0000000000000 Binary files a/pandas/tests/io/data/pickle/test_py27.pkl and /dev/null differ diff --git a/pandas/tests/io/pytables/test_read.py b/pandas/tests/io/pytables/test_read.py index c8563ee4af4a8..0fd0a8774bbf4 100644 --- a/pandas/tests/io/pytables/test_read.py +++ b/pandas/tests/io/pytables/test_read.py @@ -348,34 +348,6 @@ def test_read_hdf_series_mode_r(tmp_path, format, setup_path): tm.assert_series_equal(result, series) -@pytest.mark.filterwarnings(r"ignore:Period with BDay freq is deprecated:FutureWarning") -@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") -def test_read_py2_hdf_file_in_py3(datapath): - # GH 16781 - - # tests reading a PeriodIndex DataFrame written in Python2 in Python3 - - # the file was generated in Python 2.7 like so: - # - # df = DataFrame([1.,2,3], index=pd.PeriodIndex( - # ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B')) - # df.to_hdf('periodindex_0.20.1_x86_64_darwin_2.7.13.h5', 'p') - - expected = DataFrame( - [1.0, 2, 3], - index=pd.PeriodIndex(["2015-01-01", "2015-01-02", "2015-01-05"], freq="B"), - ) - - with ensure_clean_store( - datapath( - "io", "data", "legacy_hdf", "periodindex_0.20.1_x86_64_darwin_2.7.13.h5" - ), - mode="r", - ) as store: - result = store["p"] - tm.assert_frame_equal(result, expected) - - def test_read_infer_string(tmp_path, setup_path): # GH#54431 pytest.importorskip("pyarrow") diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index b43f430b8895b..41800dc4d7256 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -297,7 +297,13 @@ def test_read_expands_user_home_dir( ( pd.read_pickle, "os", - ("io", "data", "pickle", "categorical.0.25.0.pickle"), + ( + "io", + "data", + "legacy_pickle", + "1.4.2", + "1.4.2_x86_64_linux_3.9.7.pickle", + ), ), ], ) @@ -310,10 +316,9 @@ def test_read_fspath_all(self, reader, module, path, datapath): expected = reader(path) if path.endswith(".pickle"): - # categorical - tm.assert_categorical_equal(result, expected) - else: - tm.assert_frame_equal(result, expected) + result = result["frame"]["float"] + expected = expected["frame"]["float"] + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( "writer_name, writer_kwargs, module", diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 57c7829924531..5b8fddebd4ffa 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -399,31 +399,6 @@ def test_read(self, protocol, get_random_path): tm.assert_frame_equal(df, df2) -@pytest.mark.parametrize( - ["pickle_file", "excols"], - [ - ("test_py27.pkl", Index(["a", "b", "c"])), - ( - "test_mi_py27.pkl", - pd.MultiIndex.from_arrays([["a", "b", "c"], ["A", "B", "C"]]), - ), - ], -) -def test_unicode_decode_error(datapath, pickle_file, excols): - # pickle file written with py27, should be readable without raising - # UnicodeDecodeError, see GH#28645 and GH#31988 - path = datapath("io", "data", "pickle", pickle_file) - df = pd.read_pickle(path) - - # just test the columns are correct since the values are random - tm.assert_index_equal(df.columns, excols) - - -# --------------------- -# tests for buffer I/O -# --------------------- - - def test_pickle_buffer_roundtrip(): with tm.ensure_clean() as path: df = DataFrame(