diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index c7ddd10895efb..650185cc805b8 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -495,7 +495,7 @@ Strings Interval ^^^^^^^^ - :meth:`pd.IntervalIndex.get_indexer` and :meth:`pd.IntervalIndex.get_indexer_nonunique` raising if ``target`` is read-only array (:issue:`53703`) -- +- Bug in :class:`IntervalDtype` where the object could be kept alive when deleted (:issue:`54184`) Indexing ^^^^^^^^ @@ -539,6 +539,7 @@ Period - Bug in :class:`PeriodDtype` constructor failing to raise ``TypeError`` when no argument is passed or when ``None`` is passed (:issue:`27388`) - Bug in :class:`PeriodDtype` constructor incorrectly returning the same ``normalize`` for different :class:`DateOffset` ``freq`` inputs (:issue:`24121`) - Bug in :class:`PeriodDtype` constructor raising ``ValueError`` instead of ``TypeError`` when an invalid type is passed (:issue:`51790`) +- Bug in :class:`PeriodDtype` where the object could be kept alive when deleted (:issue:`54184`) - Bug in :func:`read_csv` not processing empty strings as a null value, with ``engine="pyarrow"`` (:issue:`52087`) - Bug in :func:`read_csv` returning ``object`` dtype columns instead of ``float64`` dtype columns with ``engine="pyarrow"`` for columns that are all null with ``engine="pyarrow"`` (:issue:`52087`) - Bug in :meth:`Period.now` not accepting the ``freq`` parameter as a keyword argument (:issue:`53369`) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 0b2029cafb565..5d3d5a1a6b344 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -950,7 +950,7 @@ class PeriodDtype(PeriodDtypeBase, PandasExtensionDtype): # error: Incompatible types in assignment (expression has type # "Dict[int, PandasExtensionDtype]", base class "PandasExtensionDtype" # defined the type as "Dict[str, PandasExtensionDtype]") [assignment] - _cache_dtypes: dict[BaseOffset, PeriodDtype] = {} # type: ignore[assignment] # noqa: E501 + _cache_dtypes: dict[BaseOffset, int] = {} # type: ignore[assignment] __hash__ = PeriodDtypeBase.__hash__ _freq: BaseOffset @@ -967,13 +967,13 @@ def __new__(cls, freq): freq = cls._parse_dtype_strict(freq) try: - return cls._cache_dtypes[freq] + dtype_code = cls._cache_dtypes[freq] except KeyError: dtype_code = freq._period_dtype_code - u = PeriodDtypeBase.__new__(cls, dtype_code, freq.n) - u._freq = freq - cls._cache_dtypes[freq] = u - return u + cls._cache_dtypes[freq] = dtype_code + u = PeriodDtypeBase.__new__(cls, dtype_code, freq.n) + u._freq = freq + return u def __reduce__(self): return type(self), (self.name,) @@ -1154,8 +1154,10 @@ class IntervalDtype(PandasExtensionDtype): ) _cache_dtypes: dict[str_type, PandasExtensionDtype] = {} + _subtype: None | np.dtype + _closed: IntervalClosedType | None - def __new__(cls, subtype=None, closed: IntervalClosedType | None = None): + def __init__(self, subtype=None, closed: IntervalClosedType | None = None) -> None: from pandas.core.dtypes.common import ( is_string_dtype, pandas_dtype, @@ -1170,19 +1172,19 @@ def __new__(cls, subtype=None, closed: IntervalClosedType | None = None): "dtype.closed and 'closed' do not match. " "Try IntervalDtype(dtype.subtype, closed) instead." ) - return subtype + self._subtype = subtype._subtype + self._closed = subtype._closed elif subtype is None: # we are called as an empty constructor # generally for pickle compat - u = object.__new__(cls) - u._subtype = None - u._closed = closed - return u + self._subtype = None + self._closed = closed elif isinstance(subtype, str) and subtype.lower() == "interval": - subtype = None + self._subtype = None + self._closed = closed else: if isinstance(subtype, str): - m = cls._match.search(subtype) + m = IntervalDtype._match.search(subtype) if m is not None: gd = m.groupdict() subtype = gd["subtype"] @@ -1199,24 +1201,15 @@ def __new__(cls, subtype=None, closed: IntervalClosedType | None = None): subtype = pandas_dtype(subtype) except TypeError as err: raise TypeError("could not construct IntervalDtype") from err - - if CategoricalDtype.is_dtype(subtype) or is_string_dtype(subtype): - # GH 19016 - msg = ( - "category, object, and string subtypes are not supported " - "for IntervalDtype" - ) - raise TypeError(msg) - - key = f"{subtype}{closed}" - try: - return cls._cache_dtypes[key] - except KeyError: - u = object.__new__(cls) - u._subtype = subtype - u._closed = closed - cls._cache_dtypes[key] = u - return u + if CategoricalDtype.is_dtype(subtype) or is_string_dtype(subtype): + # GH 19016 + msg = ( + "category, object, and string subtypes are not supported " + "for IntervalDtype" + ) + raise TypeError(msg) + self._subtype = subtype + self._closed = closed @cache_readonly def _can_hold_na(self) -> bool: @@ -1232,7 +1225,7 @@ def _can_hold_na(self) -> bool: @property def closed(self) -> IntervalClosedType: - return self._closed + return self._closed # type: ignore[return-value] @property def subtype(self): diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index aefed29a490ca..d0a34cedb7dbe 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -100,7 +100,7 @@ def test_categorical_dtype(self): ], ) def test_period_dtype(self, dtype): - assert com.pandas_dtype(dtype) is PeriodDtype(dtype) + assert com.pandas_dtype(dtype) is not PeriodDtype(dtype) assert com.pandas_dtype(dtype) == PeriodDtype(dtype) assert com.pandas_dtype(dtype) == dtype diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 07a4e06db2e74..cdb33db540743 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -1,4 +1,5 @@ import re +import weakref import numpy as np import pytest @@ -412,9 +413,9 @@ def test_hash_vs_equality(self, dtype): assert dtype == dtype2 assert dtype2 == dtype assert dtype3 == dtype - assert dtype is dtype2 - assert dtype2 is dtype - assert dtype3 is dtype + assert dtype is not dtype2 + assert dtype2 is not dtype + assert dtype3 is not dtype assert hash(dtype) == hash(dtype2) assert hash(dtype) == hash(dtype3) @@ -458,13 +459,13 @@ def test_subclass(self): def test_identity(self): assert PeriodDtype("period[D]") == PeriodDtype("period[D]") - assert PeriodDtype("period[D]") is PeriodDtype("period[D]") + assert PeriodDtype("period[D]") is not PeriodDtype("period[D]") assert PeriodDtype("period[3D]") == PeriodDtype("period[3D]") - assert PeriodDtype("period[3D]") is PeriodDtype("period[3D]") + assert PeriodDtype("period[3D]") is not PeriodDtype("period[3D]") assert PeriodDtype("period[1S1U]") == PeriodDtype("period[1000001U]") - assert PeriodDtype("period[1S1U]") is PeriodDtype("period[1000001U]") + assert PeriodDtype("period[1S1U]") is not PeriodDtype("period[1000001U]") def test_compat(self, dtype): assert not is_datetime64_ns_dtype(dtype) @@ -565,6 +566,13 @@ def test_perioddtype_caching_dateoffset_normalize(self): per_d2 = PeriodDtype(pd.offsets.YearEnd(normalize=False)) assert not per_d2.freq.normalize + def test_dont_keep_ref_after_del(self): + # GH 54184 + dtype = PeriodDtype("D") + ref = weakref.ref(dtype) + del dtype + assert ref() is None + class TestIntervalDtype(Base): @pytest.fixture @@ -581,9 +589,9 @@ def test_hash_vs_equality(self, dtype): assert dtype == dtype2 assert dtype2 == dtype assert dtype3 == dtype - assert dtype is dtype2 - assert dtype2 is dtype3 - assert dtype3 is dtype + assert dtype is not dtype2 + assert dtype2 is not dtype3 + assert dtype3 is not dtype assert hash(dtype) == hash(dtype2) assert hash(dtype) == hash(dtype3) @@ -593,9 +601,9 @@ def test_hash_vs_equality(self, dtype): assert dtype2 == dtype1 assert dtype2 == dtype2 assert dtype2 == dtype3 - assert dtype2 is dtype1 + assert dtype2 is not dtype1 assert dtype2 is dtype2 - assert dtype2 is dtype3 + assert dtype2 is not dtype3 assert hash(dtype2) == hash(dtype1) assert hash(dtype2) == hash(dtype2) assert hash(dtype2) == hash(dtype3) @@ -833,12 +841,13 @@ def test_basic_dtype(self): assert not is_interval_dtype(np.float64) def test_caching(self): + # GH 54184: Caching not shown to improve performance IntervalDtype.reset_cache() dtype = IntervalDtype("int64", "right") - assert len(IntervalDtype._cache_dtypes) == 1 + assert len(IntervalDtype._cache_dtypes) == 0 IntervalDtype("interval") - assert len(IntervalDtype._cache_dtypes) == 2 + assert len(IntervalDtype._cache_dtypes) == 0 IntervalDtype.reset_cache() tm.round_trip_pickle(dtype) @@ -856,6 +865,13 @@ def test_unpickling_without_closed(self): tm.round_trip_pickle(dtype) + def test_dont_keep_ref_after_del(self): + # GH 54184 + dtype = IntervalDtype("int64", "right") + ref = weakref.ref(dtype) + del dtype + assert ref() is None + class TestCategoricalDtypeParametrized: @pytest.mark.parametrize(