From e951b340046360ac7812d890331f77887a341862 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 2 Mar 2021 20:59:50 -0800 Subject: [PATCH] REF: move dtype-validation out of maybe_cast_to_datetime --- pandas/core/dtypes/cast.py | 95 +++++++++++++++++++++----------------- 1 file changed, 53 insertions(+), 42 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index f5ec6616659ba..92b8ce60dec60 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -38,7 +38,6 @@ Timedelta, Timestamp, conversion, - iNaT, ints_to_pydatetime, ) from pandas._libs.tslibs.timedeltas import array_to_timedelta64 @@ -1613,50 +1612,16 @@ def maybe_cast_to_datetime( is_datetime64tz = is_datetime64tz_dtype(dtype) is_timedelta64 = is_timedelta64_dtype(dtype) - if is_datetime64 or is_datetime64tz or is_timedelta64: - - # Force the dtype if needed. - msg = ( - f"The '{dtype.name}' dtype has no unit. " - f"Please pass in '{dtype.name}[ns]' instead." - ) + vdtype = getattr(value, "dtype", None) - if is_datetime64: - # unpack e.g. SparseDtype - dtype = getattr(dtype, "subtype", dtype) - if not is_dtype_equal(dtype, DT64NS_DTYPE): - - # pandas supports dtype whose granularity is less than [ns] - # e.g., [ps], [fs], [as] - if dtype <= np.dtype("M8[ns]"): - if dtype.name == "datetime64": - raise ValueError(msg) - dtype = DT64NS_DTYPE - else: - raise TypeError( - f"cannot convert datetimelike to dtype [{dtype}]" - ) - - elif is_timedelta64 and not is_dtype_equal(dtype, TD64NS_DTYPE): - - # pandas supports dtype whose granularity is less than [ns] - # e.g., [ps], [fs], [as] - if dtype <= np.dtype("m8[ns]"): - if dtype.name == "timedelta64": - raise ValueError(msg) - dtype = TD64NS_DTYPE - else: - raise TypeError(f"cannot convert timedeltalike to dtype [{dtype}]") + if is_datetime64 or is_datetime64tz or is_timedelta64: + dtype = ensure_nanosecond_dtype(dtype) if not is_sparse(value): value = np.array(value, copy=False) - # have a scalar array-like (e.g. NaT) - if value.ndim == 0: - value = iNaT - # we have an array of datetime or timedeltas & nulls - elif value.size or not is_dtype_equal(value.dtype, dtype): + if value.size or not is_dtype_equal(value.dtype, dtype): _disallow_mismatched_datetimelike(value, dtype) try: @@ -1665,6 +1630,8 @@ def maybe_cast_to_datetime( # GH 25843: Remove tz information since the dtype # didn't specify one if dta.tz is not None: + # equiv: dta.view(dtype) + # Note: NOT equivalent to dta.astype(dtype) dta = dta.tz_localize(None) value = dta elif is_datetime64tz: @@ -1678,10 +1645,12 @@ def maybe_cast_to_datetime( value = dta.astype(dtype, copy=False) elif is_dt_string: # Strings here are naive, so directly localize + # equiv: dta.astype(dtype) # though deprecated value = dta.tz_localize(dtype.tz) else: # Numeric values are UTC at this point, # so localize and convert + # equiv: Series(dta).astype(dtype) # though deprecated value = dta.tz_localize("UTC").tz_convert(dtype.tz) elif is_timedelta64: # if successful, we get a ndarray[td64ns] @@ -1694,9 +1663,7 @@ def maybe_cast_to_datetime( pass # coerce datetimelike to object - elif is_datetime64_dtype( - getattr(value, "dtype", None) - ) and not is_datetime64_dtype(dtype): + elif is_datetime64_dtype(vdtype) and not is_datetime64_dtype(dtype): if is_object_dtype(dtype): value = cast(np.ndarray, value) @@ -1740,6 +1707,50 @@ def sanitize_to_nanoseconds(values: np.ndarray) -> np.ndarray: return values +def ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj: + """ + Convert dtypes with granularity less than nanosecond to nanosecond + + >>> ensure_nanosecond_dtype(np.dtype("M8[s]")) + dtype('>> ensure_nanosecond_dtype(np.dtype("m8[ps]")) + TypeError: cannot convert timedeltalike to dtype [timedelta64[ps]] + """ + msg = ( + f"The '{dtype.name}' dtype has no unit. " + f"Please pass in '{dtype.name}[ns]' instead." + ) + + # unpack e.g. SparseDtype + dtype = getattr(dtype, "subtype", dtype) + + if not isinstance(dtype, np.dtype): + # i.e. datetime64tz + pass + + elif dtype.kind == "M" and dtype != DT64NS_DTYPE: + # pandas supports dtype whose granularity is less than [ns] + # e.g., [ps], [fs], [as] + if dtype <= np.dtype("M8[ns]"): + if dtype.name == "datetime64": + raise ValueError(msg) + dtype = DT64NS_DTYPE + else: + raise TypeError(f"cannot convert datetimelike to dtype [{dtype}]") + + elif dtype.kind == "m" and dtype != TD64NS_DTYPE: + # pandas supports dtype whose granularity is less than [ns] + # e.g., [ps], [fs], [as] + if dtype <= np.dtype("m8[ns]"): + if dtype.name == "timedelta64": + raise ValueError(msg) + dtype = TD64NS_DTYPE + else: + raise TypeError(f"cannot convert timedeltalike to dtype [{dtype}]") + return dtype + + def find_common_type(types: List[DtypeObj]) -> DtypeObj: """ Find a common data type among the given dtypes.