Skip to content

BUG: PeriodDtype and IntervalDtype holding references to itself when object is deleted #54184

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jul 19, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,7 @@ Strings
Interval
^^^^^^^^
- :meth:`pd.IntervalIndex.get_indexer` and :meth:`pd.IntervalIndex.get_indexer_nonunique` raising if ``target`` is read-only array (:issue:`53703`)
-
- Bug in :class:`IntervalDtype` where the object could be kept alive when deleted (:issue:`54184`)

Indexing
^^^^^^^^
Expand Down Expand Up @@ -538,6 +538,7 @@ Period
- Bug in :class:`PeriodDtype` constructor failing to raise ``TypeError`` when no argument is passed or when ``None`` is passed (:issue:`27388`)
- Bug in :class:`PeriodDtype` constructor incorrectly returning the same ``normalize`` for different :class:`DateOffset` ``freq`` inputs (:issue:`24121`)
- Bug in :class:`PeriodDtype` constructor raising ``ValueError`` instead of ``TypeError`` when an invalid type is passed (:issue:`51790`)
- Bug in :class:`PeriodDtype` where the object could be kept alive when deleted (:issue:`54184`)
- Bug in :func:`read_csv` not processing empty strings as a null value, with ``engine="pyarrow"`` (:issue:`52087`)
- Bug in :func:`read_csv` returning ``object`` dtype columns instead of ``float64`` dtype columns with ``engine="pyarrow"`` for columns that are all null with ``engine="pyarrow"`` (:issue:`52087`)
- Bug in :meth:`Period.now` not accepting the ``freq`` parameter as a keyword argument (:issue:`53369`)
Expand Down
55 changes: 23 additions & 32 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -950,7 +950,7 @@ class PeriodDtype(PeriodDtypeBase, PandasExtensionDtype):
# error: Incompatible types in assignment (expression has type
# "Dict[int, PandasExtensionDtype]", base class "PandasExtensionDtype"
# defined the type as "Dict[str, PandasExtensionDtype]") [assignment]
_cache_dtypes: dict[BaseOffset, PeriodDtype] = {} # type: ignore[assignment] # noqa: E501
_cache_dtypes: dict[BaseOffset, int] = {} # type: ignore[assignment]
__hash__ = PeriodDtypeBase.__hash__
_freq: BaseOffset

Expand All @@ -967,13 +967,13 @@ def __new__(cls, freq):
freq = cls._parse_dtype_strict(freq)

try:
return cls._cache_dtypes[freq]
dtype_code = cls._cache_dtypes[freq]
except KeyError:
dtype_code = freq._period_dtype_code
u = PeriodDtypeBase.__new__(cls, dtype_code, freq.n)
u._freq = freq
cls._cache_dtypes[freq] = u
return u
cls._cache_dtypes[freq] = dtype_code
u = PeriodDtypeBase.__new__(cls, dtype_code, freq.n)
u._freq = freq
return u

def __reduce__(self):
return type(self), (self.name,)
Expand Down Expand Up @@ -1155,7 +1155,7 @@ class IntervalDtype(PandasExtensionDtype):

_cache_dtypes: dict[str_type, PandasExtensionDtype] = {}

def __new__(cls, subtype=None, closed: IntervalClosedType | None = None):
def __init__(self, subtype=None, closed: IntervalClosedType | None = None) -> None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should the PeriodDtype __new__ be changed too?

Copy link
Member Author

@mroeschke mroeschke Jul 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think PeriodDtype has a PeriodDtypeBase.__cinit__ that is supposed to be "private", so I think it should continue to have its own __new__

from pandas.core.dtypes.common import (
is_string_dtype,
pandas_dtype,
Expand All @@ -1170,19 +1170,19 @@ def __new__(cls, subtype=None, closed: IntervalClosedType | None = None):
"dtype.closed and 'closed' do not match. "
"Try IntervalDtype(dtype.subtype, closed) instead."
)
return subtype
self._subtype = subtype._subtype
self._closed = subtype._closed
elif subtype is None:
# we are called as an empty constructor
# generally for pickle compat
u = object.__new__(cls)
u._subtype = None
u._closed = closed
return u
self._subtype = None
self._closed = closed
elif isinstance(subtype, str) and subtype.lower() == "interval":
subtype = None
self._subtype = None
self._closed = closed
else:
if isinstance(subtype, str):
m = cls._match.search(subtype)
m = IntervalDtype._match.search(subtype)
if m is not None:
gd = m.groupdict()
subtype = gd["subtype"]
Expand All @@ -1199,24 +1199,15 @@ def __new__(cls, subtype=None, closed: IntervalClosedType | None = None):
subtype = pandas_dtype(subtype)
except TypeError as err:
raise TypeError("could not construct IntervalDtype") from err

if CategoricalDtype.is_dtype(subtype) or is_string_dtype(subtype):
# GH 19016
msg = (
"category, object, and string subtypes are not supported "
"for IntervalDtype"
)
raise TypeError(msg)

key = f"{subtype}{closed}"
try:
return cls._cache_dtypes[key]
except KeyError:
u = object.__new__(cls)
u._subtype = subtype
u._closed = closed
cls._cache_dtypes[key] = u
return u
if CategoricalDtype.is_dtype(subtype) or is_string_dtype(subtype):
# GH 19016
msg = (
"category, object, and string subtypes are not supported "
"for IntervalDtype"
)
raise TypeError(msg)
self._subtype = subtype
self._closed = closed

@cache_readonly
def _can_hold_na(self) -> bool:
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/dtypes/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def test_categorical_dtype(self):
],
)
def test_period_dtype(self, dtype):
assert com.pandas_dtype(dtype) is PeriodDtype(dtype)
assert com.pandas_dtype(dtype) is not PeriodDtype(dtype)
assert com.pandas_dtype(dtype) == PeriodDtype(dtype)
assert com.pandas_dtype(dtype) == dtype

Expand Down
42 changes: 29 additions & 13 deletions pandas/tests/dtypes/test_dtypes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
import weakref

import numpy as np
import pytest
Expand Down Expand Up @@ -412,9 +413,9 @@ def test_hash_vs_equality(self, dtype):
assert dtype == dtype2
assert dtype2 == dtype
assert dtype3 == dtype
assert dtype is dtype2
assert dtype2 is dtype
assert dtype3 is dtype
assert dtype is not dtype2
assert dtype2 is not dtype
assert dtype3 is not dtype
assert hash(dtype) == hash(dtype2)
assert hash(dtype) == hash(dtype3)

Expand Down Expand Up @@ -458,13 +459,13 @@ def test_subclass(self):

def test_identity(self):
assert PeriodDtype("period[D]") == PeriodDtype("period[D]")
assert PeriodDtype("period[D]") is PeriodDtype("period[D]")
assert PeriodDtype("period[D]") is not PeriodDtype("period[D]")

assert PeriodDtype("period[3D]") == PeriodDtype("period[3D]")
assert PeriodDtype("period[3D]") is PeriodDtype("period[3D]")
assert PeriodDtype("period[3D]") is not PeriodDtype("period[3D]")

assert PeriodDtype("period[1S1U]") == PeriodDtype("period[1000001U]")
assert PeriodDtype("period[1S1U]") is PeriodDtype("period[1000001U]")
assert PeriodDtype("period[1S1U]") is not PeriodDtype("period[1000001U]")

def test_compat(self, dtype):
assert not is_datetime64_ns_dtype(dtype)
Expand Down Expand Up @@ -565,6 +566,13 @@ def test_perioddtype_caching_dateoffset_normalize(self):
per_d2 = PeriodDtype(pd.offsets.YearEnd(normalize=False))
assert not per_d2.freq.normalize

def test_dont_keep_ref_after_del(self):
# GH 54184
dtype = PeriodDtype("D")
ref = weakref.ref(dtype)
del dtype
assert ref() is None


class TestIntervalDtype(Base):
@pytest.fixture
Expand All @@ -581,9 +589,9 @@ def test_hash_vs_equality(self, dtype):
assert dtype == dtype2
assert dtype2 == dtype
assert dtype3 == dtype
assert dtype is dtype2
assert dtype2 is dtype3
assert dtype3 is dtype
assert dtype is not dtype2
assert dtype2 is not dtype3
assert dtype3 is not dtype
assert hash(dtype) == hash(dtype2)
assert hash(dtype) == hash(dtype3)

Expand All @@ -593,9 +601,9 @@ def test_hash_vs_equality(self, dtype):
assert dtype2 == dtype1
assert dtype2 == dtype2
assert dtype2 == dtype3
assert dtype2 is dtype1
assert dtype2 is not dtype1
assert dtype2 is dtype2
assert dtype2 is dtype3
assert dtype2 is not dtype3
assert hash(dtype2) == hash(dtype1)
assert hash(dtype2) == hash(dtype2)
assert hash(dtype2) == hash(dtype3)
Expand Down Expand Up @@ -833,12 +841,13 @@ def test_basic_dtype(self):
assert not is_interval_dtype(np.float64)

def test_caching(self):
# GH 54184: Caching not shown to improve performance
IntervalDtype.reset_cache()
dtype = IntervalDtype("int64", "right")
assert len(IntervalDtype._cache_dtypes) == 1
assert len(IntervalDtype._cache_dtypes) == 0

IntervalDtype("interval")
assert len(IntervalDtype._cache_dtypes) == 2
assert len(IntervalDtype._cache_dtypes) == 0

IntervalDtype.reset_cache()
tm.round_trip_pickle(dtype)
Expand All @@ -856,6 +865,13 @@ def test_unpickling_without_closed(self):

tm.round_trip_pickle(dtype)

def test_dont_keep_ref_after_del(self):
# GH 54184
dtype = IntervalDtype("int64", "right")
ref = weakref.ref(dtype)
del dtype
assert ref() is None


class TestCategoricalDtypeParametrized:
@pytest.mark.parametrize(
Expand Down