From d75762d94296b459d1d80c5f05812001114db76f Mon Sep 17 00:00:00 2001 From: srkds Date: Wed, 3 May 2023 00:25:55 +0530 Subject: [PATCH 1/9] BUG: NaT instead of error for timestamp --- pandas/core/internals/managers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 36dd0cece0f20..723f75d707541 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -26,6 +26,7 @@ BlockPlacement, BlockValuesRefs, ) +from pandas._libs.tslibs import NaT from pandas.errors import PerformanceWarning from pandas.util._decorators import cache_readonly from pandas.util._exceptions import find_stack_level @@ -2358,7 +2359,7 @@ def _preprocess_slice_or_indexer( def make_na_array(dtype: DtypeObj, shape: Shape, fill_value) -> ArrayLike: if isinstance(dtype, DatetimeTZDtype): # NB: exclude e.g. pyarrow[dt64tz] dtypes - i8values = np.full(shape, fill_value._value) + i8values = np.full(shape, NaT.value) return DatetimeArray(i8values, dtype=dtype) elif is_1d_only_ea_dtype(dtype): From c1c56754595c590204a407f9567b42380ef265dd Mon Sep 17 00:00:00 2001 From: srkds Date: Mon, 8 May 2023 23:37:21 +0530 Subject: [PATCH 2/9] Timestamp --- pandas/core/internals/managers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index cdaa3ef39821f..acea84a0ff7ef 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -26,7 +26,7 @@ BlockPlacement, BlockValuesRefs, ) -from pandas._libs.tslibs import NaT +from pandas._libs.tslibs import Timestamp from pandas.errors import PerformanceWarning from pandas.util._decorators import cache_readonly from pandas.util._exceptions import find_stack_level @@ -2406,7 +2406,7 @@ def _preprocess_slice_or_indexer( def make_na_array(dtype: DtypeObj, shape: Shape, fill_value) -> ArrayLike: if isinstance(dtype, DatetimeTZDtype): # NB: exclude e.g. pyarrow[dt64tz] dtypes - i8values = np.full(shape, NaT.value) + i8values = np.full(shape, Timestamp(fill_value)._value) return DatetimeArray(i8values, dtype=dtype) elif is_1d_only_ea_dtype(dtype): From 4171a7c0f5aa9f15f283d4c24df3cdf57d6445a5 Mon Sep 17 00:00:00 2001 From: srkds Date: Tue, 9 May 2023 23:40:58 +0530 Subject: [PATCH 3/9] whatsnew entry --- doc/source/whatsnew/v2.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 62d56f684a11d..1a63cdc6050ca 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -309,6 +309,7 @@ Categorical Datetimelike ^^^^^^^^^^^^ - :meth:`DatetimeIndex.map` with ``na_action="ignore"`` now works as expected. (:issue:`51644`) +- Bug in :func:`concat` raises ``AttributeError`` when concate ``None`` dtype DataFrame with ``timestamp`` dtype DataFrame. (:issue:`52093`) - Bug in :func:`date_range` when ``freq`` was a :class:`DateOffset` with ``nanoseconds`` (:issue:`46877`) - Bug in :meth:`Timestamp.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsDatetime`` (:issue:`51494`) - Bug in :meth:`arrays.DatetimeArray.map` and :meth:`DatetimeIndex.map`, where the supplied callable operated array-wise instead of element-wise (:issue:`51977`) From 416f4a8b99cc3622aa94bfaebb8781bf9efd3260 Mon Sep 17 00:00:00 2001 From: srkds Date: Mon, 26 Jun 2023 18:58:16 +0530 Subject: [PATCH 4/9] added actual bug test case --- pandas/tests/reshape/concat/test_concat.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index dc14e6e74302e..28f3995d1102e 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -826,3 +826,14 @@ def test_concat_mismatched_keys_length(): concat((x for x in sers), keys=(y for y in keys), axis=1) with tm.assert_produces_warning(FutureWarning, match=msg): concat((x for x in sers), keys=(y for y in keys), axis=0) + + +def test_concat_none_with_datetime(): + # GH 52093 + df1 = DataFrame([{"A": None}], dtype="datetime64[ns, UTC]") + df2 = DataFrame([{"A": pd.to_datetime("1990-12-20 00:00:00+00:00")}]) + result = concat([df1, df2]) + expected = DataFrame( + [{"A": None}, {"A": pd.to_datetime("1990-12-20 00:00:00+00:00")}], index=[0, 0] + ) + tm.assert_frame_equal(result, expected) From 71788fd188fa2baf64c1e56a585d56712d0999f9 Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Sat, 5 Aug 2023 23:12:29 +0800 Subject: [PATCH 5/9] Update test --- pandas/tests/reshape/concat/test_concat.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 4491023125fb2..38a9048ca95e9 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -869,3 +869,14 @@ def test_concat_multiindex_with_category(): ) expected = expected.set_index(["c1", "c2"]) tm.assert_frame_equal(result, expected) + + +def test_concat_none_with_timezone_timestamp(): + # GH#52093 + df1 = DataFrame([{"A": None}]) + df2 = DataFrame([{"A": pd.Timestamp("1990-12-20 00:00:00+00:00")}]) + msg = "The behavior of DataFrame concatenation with empty or all-NA entries" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = concat([df1, df2], ignore_index=True) + expected = DataFrame({"A": [None, pd.Timestamp("1990-12-20 00:00:00+00:00")]}) + tm.assert_frame_equal(result, expected) From b902ade43b492ff3785ccf5e2e2dee729ce4a35b Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Sun, 6 Aug 2023 15:02:18 +0800 Subject: [PATCH 6/9] Update whatsnew --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index e2a21e77b4054..ca66fc1d147d2 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -628,7 +628,7 @@ Datetimelike - :meth:`DatetimeIndex.map` with ``na_action="ignore"`` now works as expected. (:issue:`51644`) - :meth:`DatetimeIndex.slice_indexer` now raises ``KeyError`` for non-monotonic indexes if either of the slice bounds is not in the index, this behaviour was previously deprecated but inconsistently handled. (:issue:`53983`) - Bug in :class:`DateOffset` which had inconsistent behavior when multiplying a :class:`DateOffset` object by a constant (:issue:`47953`) -- Bug in :func:`concat` raises ``AttributeError`` when concate ``None`` dtype DataFrame with ``timestamp`` dtype DataFrame. (:issue:`52093`) +- Bug in :func:`concat` raises ``AttributeError`` when concate all-NA DataFrame with :class:`DatetimeTZDtype` dtype DataFrame. (:issue:`52093`) - Bug in :func:`date_range` when ``freq`` was a :class:`DateOffset` with ``nanoseconds`` (:issue:`46877`) - Bug in :func:`to_datetime` converting :class:`Series` or :class:`DataFrame` containing :class:`arrays.ArrowExtensionArray` of ``pyarrow`` timestamps to numpy datetimes (:issue:`52545`) - Bug in :meth:`DataFrame.to_sql` raising ``ValueError`` for pyarrow-backed date like dtypes (:issue:`53854`) From f3ddec2bf8e8284284dacbf4303971f3378ee0e7 Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Thu, 10 Aug 2023 11:51:41 +0800 Subject: [PATCH 7/9] Add unit --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/internals/managers.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index ca66fc1d147d2..bd0a87b7f099c 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -628,7 +628,7 @@ Datetimelike - :meth:`DatetimeIndex.map` with ``na_action="ignore"`` now works as expected. (:issue:`51644`) - :meth:`DatetimeIndex.slice_indexer` now raises ``KeyError`` for non-monotonic indexes if either of the slice bounds is not in the index, this behaviour was previously deprecated but inconsistently handled. (:issue:`53983`) - Bug in :class:`DateOffset` which had inconsistent behavior when multiplying a :class:`DateOffset` object by a constant (:issue:`47953`) -- Bug in :func:`concat` raises ``AttributeError`` when concate all-NA DataFrame with :class:`DatetimeTZDtype` dtype DataFrame. (:issue:`52093`) +- Bug in :func:`concat` raising ``AttributeError`` when concatenating all-NA DataFrame with :class:`DatetimeTZDtype` dtype DataFrame. (:issue:`52093`) - Bug in :func:`date_range` when ``freq`` was a :class:`DateOffset` with ``nanoseconds`` (:issue:`46877`) - Bug in :func:`to_datetime` converting :class:`Series` or :class:`DataFrame` containing :class:`arrays.ArrowExtensionArray` of ``pyarrow`` timestamps to numpy datetimes (:issue:`52545`) - Bug in :meth:`DataFrame.to_sql` raising ``ValueError`` for pyarrow-backed date like dtypes (:issue:`53854`) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 4c437d768a019..7eab3c076c3c7 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -2275,7 +2275,8 @@ def _preprocess_slice_or_indexer( def make_na_array(dtype: DtypeObj, shape: Shape, fill_value) -> ArrayLike: if isinstance(dtype, DatetimeTZDtype): # NB: exclude e.g. pyarrow[dt64tz] dtypes - i8values = np.full(shape, Timestamp(fill_value)._value) + ts = Timestamp(fill_value, unit=dtype.unit) + i8values = np.full(shape, ts._value) return DatetimeArray(i8values, dtype=dtype) elif is_1d_only_ea_dtype(dtype): From dc073e98ad98f686f29e2655fdaa4803e701e7b0 Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Mon, 21 Aug 2023 18:31:35 +0800 Subject: [PATCH 8/9] Update --- pandas/core/internals/managers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 119ffabe40465..d602fc7262f19 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -2301,7 +2301,7 @@ def _preprocess_slice_or_indexer( def make_na_array(dtype: DtypeObj, shape: Shape, fill_value) -> ArrayLike: if isinstance(dtype, DatetimeTZDtype): # NB: exclude e.g. pyarrow[dt64tz] dtypes - ts = Timestamp(fill_value, unit=dtype.unit) + ts = Timestamp(fill_value).as_unit(dtype.unit) i8values = np.full(shape, ts._value) return DatetimeArray(i8values, dtype=dtype) From e5ae627636865a64c082255ebc6490c8c18c0446 Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Tue, 24 Oct 2023 00:38:49 +0800 Subject: [PATCH 9/9] Move whatsnew --- doc/source/whatsnew/v2.1.0.rst | 1 - doc/source/whatsnew/v2.2.0.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index c3b9a70baa3bf..51b4c4f297b07 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -664,7 +664,6 @@ Datetimelike - :meth:`DatetimeIndex.map` with ``na_action="ignore"`` now works as expected (:issue:`51644`) - :meth:`DatetimeIndex.slice_indexer` now raises ``KeyError`` for non-monotonic indexes if either of the slice bounds is not in the index; this behaviour was previously deprecated but inconsistently handled (:issue:`53983`) - Bug in :class:`DateOffset` which had inconsistent behavior when multiplying a :class:`DateOffset` object by a constant (:issue:`47953`) -- Bug in :func:`concat` raising ``AttributeError`` when concatenating all-NA DataFrame with :class:`DatetimeTZDtype` dtype DataFrame. (:issue:`52093`) - Bug in :func:`date_range` when ``freq`` was a :class:`DateOffset` with ``nanoseconds`` (:issue:`46877`) - Bug in :func:`to_datetime` converting :class:`Series` or :class:`DataFrame` containing :class:`arrays.ArrowExtensionArray` of PyArrow timestamps to numpy datetimes (:issue:`52545`) - Bug in :meth:`.DatetimeArray.map` and :meth:`DatetimeIndex.map`, where the supplied callable operated array-wise instead of element-wise (:issue:`51977`) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 9eb5bbc8f07d5..e3d17977ad26c 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -301,6 +301,7 @@ Categorical Datetimelike ^^^^^^^^^^^^ +- Bug in :func:`concat` raising ``AttributeError`` when concatenating all-NA DataFrame with :class:`DatetimeTZDtype` dtype DataFrame. (:issue:`52093`) - Bug in :meth:`DatetimeIndex.union` returning object dtype for tz-aware indexes with the same timezone but different units (:issue:`55238`) - Bug in :meth:`Tick.delta` with very large ticks raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`) - Bug in adding or subtracting a :class:`Week` offset to a ``datetime64`` :class:`Series`, :class:`Index`, or :class:`DataFrame` column with non-nanosecond resolution returning incorrect results (:issue:`55583`)