From c6b5ff7cc2781da46a76f7d796c3e888a2200d37 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 14 Aug 2021 13:51:08 -0700 Subject: [PATCH 1/2] BUG: JoinUnit._is_valid_na_for --- pandas/core/internals/concat.py | 33 ++++++++++++++++------ pandas/tests/reshape/concat/test_append.py | 6 ++++ 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 1802a4d58a34a..e2f8d0a8e72fe 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -10,7 +10,11 @@ import numpy as np -from pandas._libs import internals as libinternals +from pandas._libs import ( + NaT, + internals as libinternals, +) +from pandas._libs.missing import NA from pandas._typing import ( ArrayLike, DtypeObj, @@ -28,7 +32,7 @@ is_1d_only_ea_obj, is_datetime64tz_dtype, is_dtype_equal, - is_extension_array_dtype, + needs_i8_conversion, ) from pandas.core.dtypes.concat import ( cast_to_common_type, @@ -374,13 +378,20 @@ def _is_valid_na_for(self, dtype: DtypeObj) -> bool: values = self.block.values return all(is_valid_na_for_dtype(x, dtype) for x in values.ravel(order="K")) - if self.dtype.kind == dtype.kind == "M" and not is_dtype_equal( - self.dtype, dtype - ): + na_value = self.block.fill_value + if na_value is NaT and not is_dtype_equal(self.dtype, dtype): + # e.g. we are dt64 and other is td64 # fill_values match but we should not cast self.block.values to dtype + # TODO: this will need updating if we ever have non-nano dt64/td64 return False - na_value = self.block.fill_value + if na_value is NA and needs_i8_conversion(dtype): + # FIXME: kludge; test_append_empty_frame_with_timedelta64ns_nat + # e.g. self.dtype == "Int64" and dtype is td64, we dont want + # to consider these as matching + return False + + # TODO: better to use can_hold_element? return is_valid_na_for_dtype(na_value, dtype) @cache_readonly @@ -426,9 +437,6 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike: i8values = np.full(self.shape, fill_value.value) return DatetimeArray(i8values, dtype=empty_dtype) - elif is_extension_array_dtype(blk_dtype): - pass - elif is_1d_only_ea_dtype(empty_dtype): empty_dtype = cast(ExtensionDtype, empty_dtype) cls = empty_dtype.construct_array_type() @@ -440,6 +448,13 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike: return missing_arr.take( empty_arr, allow_fill=True, fill_value=fill_value ) + elif isinstance(empty_dtype, ExtensionDtype): + # TODO: no tests get here, a handful would if we disabled + # the dt64tz special-case above (which is faster) + cls = empty_dtype.construct_array_type() + missing_arr = cls._empty(shape=self.shape, dtype=empty_dtype) + missing_arr[:] = fill_value + return missing_arr else: # NB: we should never get here with empty_dtype integer or bool; # if we did, the missing_arr.fill would cast to gibberish diff --git a/pandas/tests/reshape/concat/test_append.py b/pandas/tests/reshape/concat/test_append.py index ea766089f880d..d47fcb8ac1ca3 100644 --- a/pandas/tests/reshape/concat/test_append.py +++ b/pandas/tests/reshape/concat/test_append.py @@ -348,6 +348,12 @@ def test_append_empty_tz_frame_with_datetime64ns(self): expected = DataFrame({"a": [pd.NaT]}).astype(object) tm.assert_frame_equal(result, expected) + # mismatched tz + other = Series({"a": pd.NaT}, dtype="datetime64[ns, US/Pacific]") + result = df.append(other, ignore_index=True) + expected = DataFrame({"a": [pd.NaT]}).astype(object) + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( "dtype_str", ["datetime64[ns, UTC]", "datetime64[ns]", "Int64", "int64"] ) From 9b9900d8b3cc94feda5113a080f86543bb6accee Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 14 Aug 2021 17:00:39 -0700 Subject: [PATCH 2/2] mypy fixup --- pandas/core/internals/concat.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index e2f8d0a8e72fe..0aed9e697ca66 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -458,8 +458,6 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike: else: # NB: we should never get here with empty_dtype integer or bool; # if we did, the missing_arr.fill would cast to gibberish - empty_dtype = cast(np.dtype, empty_dtype) - missing_arr = np.empty(self.shape, dtype=empty_dtype) missing_arr.fill(fill_value) return missing_arr