From aa166a4e02403747d4fe2482f2a94765e26b44e8 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 10 Feb 2021 19:12:09 -0800 Subject: [PATCH 1/2] BUG: maybe_promote with dt64tz and mismatched NA --- pandas/core/dtypes/cast.py | 66 ++++++++++++------------ pandas/tests/dtypes/cast/test_promote.py | 11 ++++ 2 files changed, 44 insertions(+), 33 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 5d9b51238c255..e24e58e6c2738 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -24,7 +24,7 @@ import numpy as np -from pandas._libs import lib, missing as libmissing, tslib +from pandas._libs import lib, tslib from pandas._libs.tslibs import ( NaT, OutOfBoundsDatetime, @@ -87,7 +87,12 @@ ABCSeries, ) from pandas.core.dtypes.inference import is_list_like -from pandas.core.dtypes.missing import is_valid_na_for_dtype, isna, notna +from pandas.core.dtypes.missing import ( + is_valid_na_for_dtype, + isna, + na_value_for_dtype, + notna, +) if TYPE_CHECKING: from pandas import Series @@ -530,16 +535,26 @@ def maybe_promote(dtype: DtypeObj, fill_value=np.nan): dtype = np.dtype(object) return dtype, fill_value + kinds = ["i", "u", "f", "c", "m", "M"] + if is_valid_na_for_dtype(fill_value, dtype) and dtype.kind in kinds: + dtype = ensure_dtype_can_hold_na(dtype) + fv = na_value_for_dtype(dtype) + return dtype, fv + + elif isna(fill_value): + dtype = np.dtype(object) + if fill_value is None: + # but we retain e.g. pd.NA + fill_value = np.nan + return dtype, fill_value + # returns tuple of (dtype, fill_value) if issubclass(dtype.type, np.datetime64): if isinstance(fill_value, datetime) and fill_value.tzinfo is not None: # Trying to insert tzaware into tznaive, have to cast to object dtype = np.dtype(np.object_) - elif is_integer(fill_value) or (is_float(fill_value) and not isna(fill_value)): + elif is_integer(fill_value) or is_float(fill_value): dtype = np.dtype(np.object_) - elif is_valid_na_for_dtype(fill_value, dtype): - # e.g. pd.NA, which is not accepted by Timestamp constructor - fill_value = np.datetime64("NaT", "ns") else: try: fill_value = Timestamp(fill_value).to_datetime64() @@ -548,14 +563,11 @@ def maybe_promote(dtype: DtypeObj, fill_value=np.nan): elif issubclass(dtype.type, np.timedelta64): if ( is_integer(fill_value) - or (is_float(fill_value) and not np.isnan(fill_value)) + or is_float(fill_value) or isinstance(fill_value, str) ): # TODO: What about str that can be a timedelta? dtype = np.dtype(np.object_) - elif is_valid_na_for_dtype(fill_value, dtype): - # e.g pd.NA, which is not accepted by the Timedelta constructor - fill_value = np.timedelta64("NaT", "ns") else: try: fv = Timedelta(fill_value) @@ -568,9 +580,7 @@ def maybe_promote(dtype: DtypeObj, fill_value=np.nan): else: fill_value = fv.to_timedelta64() elif isinstance(dtype, DatetimeTZDtype): - if isna(fill_value): - fill_value = NaT - elif not isinstance(fill_value, datetime): + if not isinstance(fill_value, datetime): dtype = np.dtype(np.object_) elif fill_value.tzinfo is None: dtype = np.dtype(np.object_) @@ -578,9 +588,6 @@ def maybe_promote(dtype: DtypeObj, fill_value=np.nan): # TODO: sure we want to cast here? dtype = np.dtype(np.object_) - elif is_extension_array_dtype(dtype) and isna(fill_value): - fill_value = dtype.na_value - elif is_float(fill_value): if issubclass(dtype.type, np.bool_): dtype = np.dtype(np.object_) @@ -629,30 +636,23 @@ def maybe_promote(dtype: DtypeObj, fill_value=np.nan): # e.g. mst is np.complex128 and dtype is np.complex64 dtype = mst - elif fill_value is None or fill_value is libmissing.NA: - # Note: we already excluded dt64/td64 dtypes above - if is_float_dtype(dtype) or is_complex_dtype(dtype): - fill_value = np.nan - elif is_integer_dtype(dtype): - dtype = np.float64 - fill_value = np.nan - else: - dtype = np.dtype(np.object_) - if fill_value is not libmissing.NA: - fill_value = np.nan else: dtype = np.dtype(np.object_) - # in case we have a string that looked like a number - if is_extension_array_dtype(dtype): - pass - elif issubclass(np.dtype(dtype).type, (bytes, str)): - dtype = np.dtype(np.object_) - + dtype = sanitize_str_dtypes_to_object(dtype) fill_value = _ensure_dtype_type(fill_value, dtype) return dtype, fill_value +def sanitize_str_dtypes_to_object(dtype: DtypeObj) -> DtypeObj: + """ + Convert any numpy str/bytes dtypes to object. + """ + if isinstance(dtype, np.dtype) and dtype.kind in ["S", "U"]: + dtype = np.dtype(object) + return dtype + + def _ensure_dtype_type(value, dtype: DtypeObj): """ Ensure that the given value is an instance of the given dtype. diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 16caf935652cb..4f622639d7d62 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -450,6 +450,17 @@ def test_maybe_promote_datetimetz_with_na(tz_aware_fixture, fill_value): _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) +def test_maybe_promote_datetimetz_with_mismatched_na(tz_aware_fixture): + fill_value = np.timedelta64("NaT") + + dtype = DatetimeTZDtype(tz=tz_aware_fixture) + + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + @pytest.mark.parametrize( "fill_value", [ From 7f15f5e0f9507c03c5159823229c34cb149332dc Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 11 Feb 2021 17:50:16 -0800 Subject: [PATCH 2/2] Fixup deleted wrong test --- pandas/tests/dtypes/cast/test_promote.py | 26 ++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 03d0a891d9a8b..08303fc601b3e 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -20,6 +20,7 @@ is_scalar, is_timedelta64_dtype, ) +from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import isna import pandas as pd @@ -405,6 +406,31 @@ def test_maybe_promote_any_with_datetime64( _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) +@pytest.mark.parametrize( + "fill_value", + [ + pd.Timestamp("now"), + np.datetime64("now"), + datetime.datetime.now(), + datetime.date.today(), + ], + ids=["pd.Timestamp", "np.datetime64", "datetime.datetime", "datetime.date"], +) +def test_maybe_promote_any_numpy_dtype_with_datetimetz( + any_numpy_dtype_reduced, tz_aware_fixture, fill_value +): + dtype = np.dtype(any_numpy_dtype_reduced) + fill_dtype = DatetimeTZDtype(tz=tz_aware_fixture) + + fill_value = pd.Series([fill_value], dtype=fill_dtype)[0] + + # filling any numpy dtype with datetimetz casts to object + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + def test_maybe_promote_timedelta64_with_any(timedelta64_dtype, any_numpy_dtype_reduced): dtype = np.dtype(timedelta64_dtype) fill_dtype = np.dtype(any_numpy_dtype_reduced)