From 43e03fd0d5061d683ed1f405b9997f9db05934f8 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 18 Jul 2023 15:43:44 -0700 Subject: [PATCH 1/5] BUG: PeriodDtype and IntervalDtype holding references to itself when object is deleted --- doc/source/whatsnew/v2.1.0.rst | 3 +- pandas/core/dtypes/dtypes.py | 55 +++++++++++++----------------- pandas/tests/dtypes/test_dtypes.py | 39 ++++++++++++++------- 3 files changed, 51 insertions(+), 46 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 56bdd5df5a373..6d18675d53d11 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -493,7 +493,7 @@ Strings Interval ^^^^^^^^ - :meth:`pd.IntervalIndex.get_indexer` and :meth:`pd.IntervalIndex.get_indexer_nonunique` raising if ``target`` is read-only array (:issue:`53703`) -- +- Bug in :class:`IntervalDtype` where the object could be kept alive when deleted (:issue:`?`) Indexing ^^^^^^^^ @@ -541,6 +541,7 @@ Period - Bug in :meth:`Period.now` not accepting the ``freq`` parameter as a keyword argument (:issue:`53369`) - Bug in :meth:`arrays.PeriodArray.map` and :meth:`PeriodIndex.map`, where the supplied callable operated array-wise instead of element-wise (:issue:`51977`) - Bug in incorrectly allowing construction of :class:`Period` or :class:`PeriodDtype` with :class:`CustomBusinessDay` freq; use :class:`BusinessDay` instead (:issue:`52534`) +- Bug in :class:`PeriodDtype` where the object could be kept alive when deleted (:issue:`?`) Plotting ^^^^^^^^ diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 0b2029cafb565..6254616c40591 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -950,7 +950,7 @@ class PeriodDtype(PeriodDtypeBase, PandasExtensionDtype): # error: Incompatible types in assignment (expression has type # "Dict[int, PandasExtensionDtype]", base class "PandasExtensionDtype" # defined the type as "Dict[str, PandasExtensionDtype]") [assignment] - _cache_dtypes: dict[BaseOffset, PeriodDtype] = {} # type: ignore[assignment] # noqa: E501 + _cache_dtypes: dict[BaseOffset, int] = {} # type: ignore[assignment] __hash__ = PeriodDtypeBase.__hash__ _freq: BaseOffset @@ -967,13 +967,13 @@ def __new__(cls, freq): freq = cls._parse_dtype_strict(freq) try: - return cls._cache_dtypes[freq] + dtype_code = cls._cache_dtypes[freq] except KeyError: dtype_code = freq._period_dtype_code - u = PeriodDtypeBase.__new__(cls, dtype_code, freq.n) - u._freq = freq - cls._cache_dtypes[freq] = u - return u + cls._cache_dtypes[freq] = dtype_code + u = PeriodDtypeBase.__new__(cls, dtype_code, freq.n) + u._freq = freq + return u def __reduce__(self): return type(self), (self.name,) @@ -1155,7 +1155,7 @@ class IntervalDtype(PandasExtensionDtype): _cache_dtypes: dict[str_type, PandasExtensionDtype] = {} - def __new__(cls, subtype=None, closed: IntervalClosedType | None = None): + def __init__(self, subtype=None, closed: IntervalClosedType | None = None): from pandas.core.dtypes.common import ( is_string_dtype, pandas_dtype, @@ -1170,19 +1170,19 @@ def __new__(cls, subtype=None, closed: IntervalClosedType | None = None): "dtype.closed and 'closed' do not match. " "Try IntervalDtype(dtype.subtype, closed) instead." ) - return subtype + self._subtype = subtype._subtype + self._closed = subtype._closed elif subtype is None: # we are called as an empty constructor # generally for pickle compat - u = object.__new__(cls) - u._subtype = None - u._closed = closed - return u + self._subtype = None + self._closed = closed elif isinstance(subtype, str) and subtype.lower() == "interval": - subtype = None + self._subtype = None + self._closed = closed else: if isinstance(subtype, str): - m = cls._match.search(subtype) + m = IntervalDtype._match.search(subtype) if m is not None: gd = m.groupdict() subtype = gd["subtype"] @@ -1197,27 +1197,18 @@ def __new__(cls, subtype=None, closed: IntervalClosedType | None = None): try: subtype = pandas_dtype(subtype) + if CategoricalDtype.is_dtype(subtype) or is_string_dtype(subtype): + # GH 19016 + msg = ( + "category, object, and string subtypes are not supported " + "for IntervalDtype" + ) + raise TypeError(msg) + self._subtype = subtype + self._closed = closed except TypeError as err: raise TypeError("could not construct IntervalDtype") from err - if CategoricalDtype.is_dtype(subtype) or is_string_dtype(subtype): - # GH 19016 - msg = ( - "category, object, and string subtypes are not supported " - "for IntervalDtype" - ) - raise TypeError(msg) - - key = f"{subtype}{closed}" - try: - return cls._cache_dtypes[key] - except KeyError: - u = object.__new__(cls) - u._subtype = subtype - u._closed = closed - cls._cache_dtypes[key] = u - return u - @cache_readonly def _can_hold_na(self) -> bool: subtype = self._subtype diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 07a4e06db2e74..c52252c6fdcae 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -1,4 +1,5 @@ import re +import weakref import numpy as np import pytest @@ -412,9 +413,9 @@ def test_hash_vs_equality(self, dtype): assert dtype == dtype2 assert dtype2 == dtype assert dtype3 == dtype - assert dtype is dtype2 - assert dtype2 is dtype - assert dtype3 is dtype + assert dtype is not dtype2 + assert dtype2 is not dtype + assert dtype3 is not dtype assert hash(dtype) == hash(dtype2) assert hash(dtype) == hash(dtype3) @@ -458,13 +459,13 @@ def test_subclass(self): def test_identity(self): assert PeriodDtype("period[D]") == PeriodDtype("period[D]") - assert PeriodDtype("period[D]") is PeriodDtype("period[D]") + assert PeriodDtype("period[D]") is not PeriodDtype("period[D]") assert PeriodDtype("period[3D]") == PeriodDtype("period[3D]") - assert PeriodDtype("period[3D]") is PeriodDtype("period[3D]") + assert PeriodDtype("period[3D]") is not PeriodDtype("period[3D]") assert PeriodDtype("period[1S1U]") == PeriodDtype("period[1000001U]") - assert PeriodDtype("period[1S1U]") is PeriodDtype("period[1000001U]") + assert PeriodDtype("period[1S1U]") is not PeriodDtype("period[1000001U]") def test_compat(self, dtype): assert not is_datetime64_ns_dtype(dtype) @@ -565,6 +566,12 @@ def test_perioddtype_caching_dateoffset_normalize(self): per_d2 = PeriodDtype(pd.offsets.YearEnd(normalize=False)) assert not per_d2.freq.normalize + def test_dont_keep_ref_after_del(self): + dtype = PeriodDtype("D") + ref = weakref.ref(dtype) + del dtype + assert ref() is None + class TestIntervalDtype(Base): @pytest.fixture @@ -581,9 +588,9 @@ def test_hash_vs_equality(self, dtype): assert dtype == dtype2 assert dtype2 == dtype assert dtype3 == dtype - assert dtype is dtype2 - assert dtype2 is dtype3 - assert dtype3 is dtype + assert dtype is not dtype2 + assert dtype2 is not dtype3 + assert dtype3 is not dtype assert hash(dtype) == hash(dtype2) assert hash(dtype) == hash(dtype3) @@ -593,9 +600,9 @@ def test_hash_vs_equality(self, dtype): assert dtype2 == dtype1 assert dtype2 == dtype2 assert dtype2 == dtype3 - assert dtype2 is dtype1 + assert dtype2 is not dtype1 assert dtype2 is dtype2 - assert dtype2 is dtype3 + assert dtype2 is not dtype3 assert hash(dtype2) == hash(dtype1) assert hash(dtype2) == hash(dtype2) assert hash(dtype2) == hash(dtype3) @@ -835,10 +842,10 @@ def test_basic_dtype(self): def test_caching(self): IntervalDtype.reset_cache() dtype = IntervalDtype("int64", "right") - assert len(IntervalDtype._cache_dtypes) == 1 + assert len(IntervalDtype._cache_dtypes) == 0 IntervalDtype("interval") - assert len(IntervalDtype._cache_dtypes) == 2 + assert len(IntervalDtype._cache_dtypes) == 0 IntervalDtype.reset_cache() tm.round_trip_pickle(dtype) @@ -856,6 +863,12 @@ def test_unpickling_without_closed(self): tm.round_trip_pickle(dtype) + def test_dont_keep_ref_after_del(self): + dtype = IntervalDtype("int64", "right") + ref = weakref.ref(dtype) + del dtype + assert ref() is None + class TestCategoricalDtypeParametrized: @pytest.mark.parametrize( From ae958360c8a91f18b9aa68343232658cef2998a3 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 18 Jul 2023 15:54:49 -0700 Subject: [PATCH 2/5] Add issue number --- doc/source/whatsnew/v2.1.0.rst | 4 ++-- pandas/tests/dtypes/test_dtypes.py | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 6d18675d53d11..fef282b4134cb 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -493,7 +493,7 @@ Strings Interval ^^^^^^^^ - :meth:`pd.IntervalIndex.get_indexer` and :meth:`pd.IntervalIndex.get_indexer_nonunique` raising if ``target`` is read-only array (:issue:`53703`) -- Bug in :class:`IntervalDtype` where the object could be kept alive when deleted (:issue:`?`) +- Bug in :class:`IntervalDtype` where the object could be kept alive when deleted (:issue:`54184`) Indexing ^^^^^^^^ @@ -536,12 +536,12 @@ Period - Bug in :class:`PeriodDtype` constructor failing to raise ``TypeError`` when no argument is passed or when ``None`` is passed (:issue:`27388`) - Bug in :class:`PeriodDtype` constructor incorrectly returning the same ``normalize`` for different :class:`DateOffset` ``freq`` inputs (:issue:`24121`) - Bug in :class:`PeriodDtype` constructor raising ``ValueError`` instead of ``TypeError`` when an invalid type is passed (:issue:`51790`) +- Bug in :class:`PeriodDtype` where the object could be kept alive when deleted (:issue:`54184`) - Bug in :func:`read_csv` not processing empty strings as a null value, with ``engine="pyarrow"`` (:issue:`52087`) - Bug in :func:`read_csv` returning ``object`` dtype columns instead of ``float64`` dtype columns with ``engine="pyarrow"`` for columns that are all null with ``engine="pyarrow"`` (:issue:`52087`) - Bug in :meth:`Period.now` not accepting the ``freq`` parameter as a keyword argument (:issue:`53369`) - Bug in :meth:`arrays.PeriodArray.map` and :meth:`PeriodIndex.map`, where the supplied callable operated array-wise instead of element-wise (:issue:`51977`) - Bug in incorrectly allowing construction of :class:`Period` or :class:`PeriodDtype` with :class:`CustomBusinessDay` freq; use :class:`BusinessDay` instead (:issue:`52534`) -- Bug in :class:`PeriodDtype` where the object could be kept alive when deleted (:issue:`?`) Plotting ^^^^^^^^ diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index c52252c6fdcae..cdb33db540743 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -567,6 +567,7 @@ def test_perioddtype_caching_dateoffset_normalize(self): assert not per_d2.freq.normalize def test_dont_keep_ref_after_del(self): + # GH 54184 dtype = PeriodDtype("D") ref = weakref.ref(dtype) del dtype @@ -840,6 +841,7 @@ def test_basic_dtype(self): assert not is_interval_dtype(np.float64) def test_caching(self): + # GH 54184: Caching not shown to improve performance IntervalDtype.reset_cache() dtype = IntervalDtype("int64", "right") assert len(IntervalDtype._cache_dtypes) == 0 @@ -864,6 +866,7 @@ def test_unpickling_without_closed(self): tm.round_trip_pickle(dtype) def test_dont_keep_ref_after_del(self): + # GH 54184 dtype = IntervalDtype("int64", "right") ref = weakref.ref(dtype) del dtype From 6cf1cbe9251b8a8ff587492c46fe288246dc994d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 18 Jul 2023 16:41:22 -0700 Subject: [PATCH 3/5] Address tests --- pandas/core/dtypes/dtypes.py | 18 +++++++++--------- pandas/tests/dtypes/test_common.py | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 6254616c40591..2766e2ff2e309 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1197,17 +1197,17 @@ def __init__(self, subtype=None, closed: IntervalClosedType | None = None): try: subtype = pandas_dtype(subtype) - if CategoricalDtype.is_dtype(subtype) or is_string_dtype(subtype): - # GH 19016 - msg = ( - "category, object, and string subtypes are not supported " - "for IntervalDtype" - ) - raise TypeError(msg) - self._subtype = subtype - self._closed = closed except TypeError as err: raise TypeError("could not construct IntervalDtype") from err + if CategoricalDtype.is_dtype(subtype) or is_string_dtype(subtype): + # GH 19016 + msg = ( + "category, object, and string subtypes are not supported " + "for IntervalDtype" + ) + raise TypeError(msg) + self._subtype = subtype + self._closed = closed @cache_readonly def _can_hold_na(self) -> bool: diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index aefed29a490ca..d0a34cedb7dbe 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -100,7 +100,7 @@ def test_categorical_dtype(self): ], ) def test_period_dtype(self, dtype): - assert com.pandas_dtype(dtype) is PeriodDtype(dtype) + assert com.pandas_dtype(dtype) is not PeriodDtype(dtype) assert com.pandas_dtype(dtype) == PeriodDtype(dtype) assert com.pandas_dtype(dtype) == dtype From 2daa8c6db92cdbb0bce2e7458a8b89aed439dbc7 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 18 Jul 2023 19:02:05 -0700 Subject: [PATCH 4/5] Typing --- pandas/core/dtypes/dtypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 2766e2ff2e309..0f978b5b1cb66 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1155,7 +1155,7 @@ class IntervalDtype(PandasExtensionDtype): _cache_dtypes: dict[str_type, PandasExtensionDtype] = {} - def __init__(self, subtype=None, closed: IntervalClosedType | None = None): + def __init__(self, subtype=None, closed: IntervalClosedType | None = None) -> None: from pandas.core.dtypes.common import ( is_string_dtype, pandas_dtype, From 7b652586c4c493333750d47ec1980d708fc50e57 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 19 Jul 2023 09:41:41 -0700 Subject: [PATCH 5/5] typing: --- pandas/core/dtypes/dtypes.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 0f978b5b1cb66..5d3d5a1a6b344 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1154,6 +1154,8 @@ class IntervalDtype(PandasExtensionDtype): ) _cache_dtypes: dict[str_type, PandasExtensionDtype] = {} + _subtype: None | np.dtype + _closed: IntervalClosedType | None def __init__(self, subtype=None, closed: IntervalClosedType | None = None) -> None: from pandas.core.dtypes.common import ( @@ -1223,7 +1225,7 @@ def _can_hold_na(self) -> bool: @property def closed(self) -> IntervalClosedType: - return self._closed + return self._closed # type: ignore[return-value] @property def subtype(self):