From bd3c0f8ecf21b4cb7d69874bf57f06314ce3a13a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 28 Apr 2023 16:02:02 -0700 Subject: [PATCH 1/4] BUG: PeriodDtype incorrectly caching different DateOffset freqs --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/dtypes/dtypes.py | 4 ++-- pandas/tests/arrays/test_period.py | 9 +++++++++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 644a7e86ec429..fac422b7eb4fe 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -374,12 +374,12 @@ Period ^^^^^^ - :meth:`PeriodIndex.map` with ``na_action="ignore"`` now works as expected (:issue:`51644`) - Bug in :class:`PeriodDtype` constructor failing to raise ``TypeError`` when no argument is passed or when ``None`` is passed (:issue:`27388`) +- Bug in :class:`PeriodDtype` constructor incorrectly returning the same ``normalize`` for different :class:`DateOffset` ``freq`` inputs (:issue:`24121`) - Bug in :class:`PeriodDtype` constructor raising ``ValueError`` instead of ``TypeError`` when an invalid type is passed (:issue:`51790`) - Bug in :func:`read_csv` not processing empty strings as a null value, with ``engine="pyarrow"`` (:issue:`52087`) - Bug in :func:`read_csv` returning ``object`` dtype columns instead of ``float64`` dtype columns with ``engine="pyarrow"`` for columns that are all null with ``engine="pyarrow"`` (:issue:`52087`) - Bug in :meth:`arrays.PeriodArray.map` and :meth:`PeriodIndex.map`, where the supplied callable operated array-wise instead of element-wise (:issue:`51977`) - Bug in incorrectly allowing construction of :class:`Period` or :class:`PeriodDtype` with :class:`CustomBusinessDay` freq; use :class:`BusinessDay` instead (:issue:`52534`) -- Plotting ^^^^^^^^ diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 014d7a4c5a330..4c79b391f1f64 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -916,12 +916,12 @@ def __new__(cls, freq): freq = cls._parse_dtype_strict(freq) try: - return cls._cache_dtypes[freq.freqstr] + return cls._cache_dtypes[hash(freq)] except KeyError: dtype_code = freq._period_dtype_code u = PeriodDtypeBase.__new__(cls, dtype_code, freq.n) u._freq = freq - cls._cache_dtypes[freq.freqstr] = u + cls._cache_dtypes[hash(freq)] = u return u def __reduce__(self): diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index d1e954bc2ebe2..aa0a13670a525 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -22,6 +22,15 @@ def test_registered(): assert result == expected +def test_perioddtype_caching_dateoffset_normalize(): + # GH 24121 + per_d = PeriodDtype(pd.offsets.YearEnd(normalize=True)) + assert per_d.freq.normalize + + per_d2 = PeriodDtype(pd.offsets.YearEnd(normalize=False)) + assert not per_d2.freq.normalize + + # ---------------------------------------------------------------------------- # period_array From 2578767e725c1699dfbc9fd81c2f573631ac99f0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 28 Apr 2023 16:54:53 -0700 Subject: [PATCH 2/4] Change typing --- pandas/core/dtypes/dtypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 4c79b391f1f64..d511bf01c2d91 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -899,7 +899,7 @@ class PeriodDtype(PeriodDtypeBase, PandasExtensionDtype): num = 102 _metadata = ("freq",) _match = re.compile(r"(P|p)eriod\[(?P.+)\]") - _cache_dtypes: dict[str_type, PandasExtensionDtype] = {} + _cache_dtypes: dict[int, PandasExtensionDtype] = {} __hash__ = PeriodDtypeBase.__hash__ _freq: BaseOffset From 99781c649b6884520ed95a5fa2494879d9ca63f0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 1 May 2023 11:31:05 -0700 Subject: [PATCH 3/4] Address review --- pandas/core/dtypes/dtypes.py | 6 +++--- pandas/tests/arrays/test_period.py | 9 --------- pandas/tests/dtypes/test_dtypes.py | 8 ++++++++ 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 0fbfab9474ca0..edc41041c8522 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -899,7 +899,7 @@ class PeriodDtype(PeriodDtypeBase, PandasExtensionDtype): num = 102 _metadata = ("freq",) _match = re.compile(r"(P|p)eriod\[(?P.+)\]") - _cache_dtypes: dict[int, PandasExtensionDtype] = {} + _cache_dtypes: dict[BaseOffset, PandasExtensionDtype] = {} # type: ignore[assignment] # noqa:E501 __hash__ = PeriodDtypeBase.__hash__ _freq: BaseOffset @@ -916,12 +916,12 @@ def __new__(cls, freq): freq = cls._parse_dtype_strict(freq) try: - return cls._cache_dtypes[hash(freq)] + return cls._cache_dtypes[freq] except KeyError: dtype_code = freq._period_dtype_code u = PeriodDtypeBase.__new__(cls, dtype_code, freq.n) u._freq = freq - cls._cache_dtypes[hash(freq)] = u + cls._cache_dtypes[freq] = u return u def __reduce__(self): diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index aa0a13670a525..d1e954bc2ebe2 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -22,15 +22,6 @@ def test_registered(): assert result == expected -def test_perioddtype_caching_dateoffset_normalize(): - # GH 24121 - per_d = PeriodDtype(pd.offsets.YearEnd(normalize=True)) - assert per_d.freq.normalize - - per_d2 = PeriodDtype(pd.offsets.YearEnd(normalize=False)) - assert not per_d2.freq.normalize - - # ---------------------------------------------------------------------------- # period_array diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 745a933f3a780..1cc10d14b904f 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -559,6 +559,14 @@ def test_not_string(self): # though PeriodDtype has object kind, it cannot be string assert not is_string_dtype(PeriodDtype("D")) + def test_perioddtype_caching_dateoffset_normalize(self): + # GH 24121 + per_d = PeriodDtype(pd.offsets.YearEnd(normalize=True)) + assert per_d.freq.normalize + + per_d2 = PeriodDtype(pd.offsets.YearEnd(normalize=False)) + assert not per_d2.freq.normalize + class TestIntervalDtype(Base): @pytest.fixture From bc0d571297937737b1b66ab53cc579f4afbed7ac Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 1 May 2023 11:55:36 -0700 Subject: [PATCH 4/4] Add more specific typing and error message --- pandas/core/dtypes/dtypes.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index edc41041c8522..a03d5978adfbf 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -899,7 +899,10 @@ class PeriodDtype(PeriodDtypeBase, PandasExtensionDtype): num = 102 _metadata = ("freq",) _match = re.compile(r"(P|p)eriod\[(?P.+)\]") - _cache_dtypes: dict[BaseOffset, PandasExtensionDtype] = {} # type: ignore[assignment] # noqa:E501 + # error: Incompatible types in assignment (expression has type + # "Dict[int, PandasExtensionDtype]", base class "PandasExtensionDtype" + # defined the type as "Dict[str, PandasExtensionDtype]") [assignment] + _cache_dtypes: dict[BaseOffset, PeriodDtype] = {} # type: ignore[assignment] # noqa:E501 __hash__ = PeriodDtypeBase.__hash__ _freq: BaseOffset