Skip to content

REF: move dtype-validation out of maybe_cast_to_datetime #40190

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 3, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 53 additions & 42 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
Timedelta,
Timestamp,
conversion,
iNaT,
ints_to_pydatetime,
)
from pandas._libs.tslibs.timedeltas import array_to_timedelta64
Expand Down Expand Up @@ -1613,50 +1612,16 @@ def maybe_cast_to_datetime(
is_datetime64tz = is_datetime64tz_dtype(dtype)
is_timedelta64 = is_timedelta64_dtype(dtype)

if is_datetime64 or is_datetime64tz or is_timedelta64:

# Force the dtype if needed.
msg = (
f"The '{dtype.name}' dtype has no unit. "
f"Please pass in '{dtype.name}[ns]' instead."
)
vdtype = getattr(value, "dtype", None)

if is_datetime64:
# unpack e.g. SparseDtype
dtype = getattr(dtype, "subtype", dtype)
if not is_dtype_equal(dtype, DT64NS_DTYPE):

# pandas supports dtype whose granularity is less than [ns]
# e.g., [ps], [fs], [as]
if dtype <= np.dtype("M8[ns]"):
if dtype.name == "datetime64":
raise ValueError(msg)
dtype = DT64NS_DTYPE
else:
raise TypeError(
f"cannot convert datetimelike to dtype [{dtype}]"
)

elif is_timedelta64 and not is_dtype_equal(dtype, TD64NS_DTYPE):

# pandas supports dtype whose granularity is less than [ns]
# e.g., [ps], [fs], [as]
if dtype <= np.dtype("m8[ns]"):
if dtype.name == "timedelta64":
raise ValueError(msg)
dtype = TD64NS_DTYPE
else:
raise TypeError(f"cannot convert timedeltalike to dtype [{dtype}]")
if is_datetime64 or is_datetime64tz or is_timedelta64:
dtype = ensure_nanosecond_dtype(dtype)

if not is_sparse(value):
value = np.array(value, copy=False)

# have a scalar array-like (e.g. NaT)
if value.ndim == 0:
value = iNaT

# we have an array of datetime or timedeltas & nulls
elif value.size or not is_dtype_equal(value.dtype, dtype):
if value.size or not is_dtype_equal(value.dtype, dtype):
_disallow_mismatched_datetimelike(value, dtype)

try:
Expand All @@ -1665,6 +1630,8 @@ def maybe_cast_to_datetime(
# GH 25843: Remove tz information since the dtype
# didn't specify one
if dta.tz is not None:
# equiv: dta.view(dtype)
# Note: NOT equivalent to dta.astype(dtype)
dta = dta.tz_localize(None)
value = dta
elif is_datetime64tz:
Expand All @@ -1678,10 +1645,12 @@ def maybe_cast_to_datetime(
value = dta.astype(dtype, copy=False)
elif is_dt_string:
# Strings here are naive, so directly localize
# equiv: dta.astype(dtype) # though deprecated
value = dta.tz_localize(dtype.tz)
else:
# Numeric values are UTC at this point,
# so localize and convert
# equiv: Series(dta).astype(dtype) # though deprecated
value = dta.tz_localize("UTC").tz_convert(dtype.tz)
elif is_timedelta64:
# if successful, we get a ndarray[td64ns]
Expand All @@ -1694,9 +1663,7 @@ def maybe_cast_to_datetime(
pass

# coerce datetimelike to object
elif is_datetime64_dtype(
getattr(value, "dtype", None)
) and not is_datetime64_dtype(dtype):
elif is_datetime64_dtype(vdtype) and not is_datetime64_dtype(dtype):
if is_object_dtype(dtype):
value = cast(np.ndarray, value)

Expand Down Expand Up @@ -1740,6 +1707,50 @@ def sanitize_to_nanoseconds(values: np.ndarray) -> np.ndarray:
return values


def ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj:
"""
Convert dtypes with granularity less than nanosecond to nanosecond

>>> ensure_nanosecond_dtype(np.dtype("M8[s]"))
dtype('<M8[ns]')

>>> ensure_nanosecond_dtype(np.dtype("m8[ps]"))
TypeError: cannot convert timedeltalike to dtype [timedelta64[ps]]
"""
msg = (
f"The '{dtype.name}' dtype has no unit. "
f"Please pass in '{dtype.name}[ns]' instead."
)

# unpack e.g. SparseDtype
dtype = getattr(dtype, "subtype", dtype)

if not isinstance(dtype, np.dtype):
# i.e. datetime64tz
pass

elif dtype.kind == "M" and dtype != DT64NS_DTYPE:
# pandas supports dtype whose granularity is less than [ns]
# e.g., [ps], [fs], [as]
if dtype <= np.dtype("M8[ns]"):
if dtype.name == "datetime64":
raise ValueError(msg)
dtype = DT64NS_DTYPE
else:
raise TypeError(f"cannot convert datetimelike to dtype [{dtype}]")

elif dtype.kind == "m" and dtype != TD64NS_DTYPE:
# pandas supports dtype whose granularity is less than [ns]
# e.g., [ps], [fs], [as]
if dtype <= np.dtype("m8[ns]"):
if dtype.name == "timedelta64":
raise ValueError(msg)
dtype = TD64NS_DTYPE
else:
raise TypeError(f"cannot convert timedeltalike to dtype [{dtype}]")
return dtype


def find_common_type(types: List[DtypeObj]) -> DtypeObj:
"""
Find a common data type among the given dtypes.
Expand Down