Skip to content

Commit c500987

Browse files
authored
REF: split out to_datetime_with_unit, _to_datetime_with_format (#40185)
1 parent 579b75a commit c500987

File tree

1 file changed

+97
-65
lines changed

1 file changed

+97
-65
lines changed

pandas/core/tools/datetimes.py

Lines changed: 97 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -346,38 +346,7 @@ def _convert_listlike_datetimes(
346346
elif unit is not None:
347347
if format is not None:
348348
raise ValueError("cannot specify both format and unit")
349-
arg = getattr(arg, "_values", arg)
350-
351-
# GH 30050 pass an ndarray to tslib.array_with_unit_to_datetime
352-
# because it expects an ndarray argument
353-
if isinstance(arg, IntegerArray):
354-
result = arg.astype(f"datetime64[{unit}]")
355-
tz_parsed = None
356-
else:
357-
358-
result, tz_parsed = tslib.array_with_unit_to_datetime(
359-
arg, unit, errors=errors
360-
)
361-
362-
if errors == "ignore":
363-
364-
result = Index(result, name=name)
365-
else:
366-
result = DatetimeIndex(result, name=name)
367-
# GH 23758: We may still need to localize the result with tz
368-
# GH 25546: Apply tz_parsed first (from arg), then tz (from caller)
369-
# result will be naive but in UTC
370-
try:
371-
result = result.tz_localize("UTC").tz_convert(tz_parsed)
372-
except AttributeError:
373-
# Regular Index from 'ignore' path
374-
return result
375-
if tz is not None:
376-
if result.tz is None:
377-
result = result.tz_localize(tz)
378-
else:
379-
result = result.tz_convert(tz)
380-
return result
349+
return _to_datetime_with_unit(arg, unit, name, tz, errors)
381350
elif getattr(arg, "ndim", 1) > 1:
382351
raise TypeError(
383352
"arg must be a string, datetime, list, tuple, 1-d array, or Series"
@@ -416,36 +385,11 @@ def _convert_listlike_datetimes(
416385
result = None
417386

418387
if format is not None:
419-
try:
420-
# shortcut formatting here
421-
if format == "%Y%m%d":
422-
# pass orig_arg as float-dtype may have been converted to
423-
# datetime64[ns]
424-
orig_arg = ensure_object(orig_arg)
425-
try:
426-
result = _attempt_YYYYMMDD(orig_arg, errors=errors)
427-
except (ValueError, TypeError, OutOfBoundsDatetime) as err:
428-
raise ValueError(
429-
"cannot convert the input to '%Y%m%d' date format"
430-
) from err
431-
432-
# fallback
433-
if result is None:
434-
result = _array_strptime_with_fallback(
435-
arg, name, tz, format, exact, errors, infer_datetime_format
436-
)
437-
if result is not None:
438-
return result
439-
440-
except ValueError as e:
441-
# Fallback to try to convert datetime objects if timezone-aware
442-
# datetime objects are found without passing `utc=True`
443-
try:
444-
values, tz = conversion.datetime_to_datetime64(arg)
445-
dta = DatetimeArray(values, dtype=tz_to_dtype(tz))
446-
return DatetimeIndex._simple_new(dta, name=name)
447-
except (ValueError, TypeError):
448-
raise e
388+
result = _to_datetime_with_format(
389+
arg, orig_arg, name, tz, format, exact, errors, infer_datetime_format
390+
)
391+
if result is not None:
392+
return result
449393

450394
if result is None:
451395
assert format is None or infer_datetime_format
@@ -517,6 +461,94 @@ def _array_strptime_with_fallback(
517461
return _box_as_indexlike(result, utc=utc, name=name)
518462

519463

464+
def _to_datetime_with_format(
465+
arg,
466+
orig_arg,
467+
name,
468+
tz,
469+
fmt: str,
470+
exact: bool,
471+
errors: Optional[str],
472+
infer_datetime_format: bool,
473+
) -> Optional[Index]:
474+
"""
475+
Try parsing with the given format, returning None on failure.
476+
"""
477+
result = None
478+
try:
479+
# shortcut formatting here
480+
if fmt == "%Y%m%d":
481+
# pass orig_arg as float-dtype may have been converted to
482+
# datetime64[ns]
483+
orig_arg = ensure_object(orig_arg)
484+
try:
485+
# may return None without raising
486+
result = _attempt_YYYYMMDD(orig_arg, errors=errors)
487+
except (ValueError, TypeError, OutOfBoundsDatetime) as err:
488+
raise ValueError(
489+
"cannot convert the input to '%Y%m%d' date format"
490+
) from err
491+
if result is not None:
492+
utc = tz == "utc"
493+
return _box_as_indexlike(result, utc=utc, name=name)
494+
495+
# fallback
496+
if result is None:
497+
result = _array_strptime_with_fallback(
498+
arg, name, tz, fmt, exact, errors, infer_datetime_format
499+
)
500+
if result is not None:
501+
return result
502+
503+
except ValueError as e:
504+
# Fallback to try to convert datetime objects if timezone-aware
505+
# datetime objects are found without passing `utc=True`
506+
try:
507+
values, tz = conversion.datetime_to_datetime64(arg)
508+
dta = DatetimeArray(values, dtype=tz_to_dtype(tz))
509+
return DatetimeIndex._simple_new(dta, name=name)
510+
except (ValueError, TypeError):
511+
raise e
512+
513+
return result
514+
515+
516+
def _to_datetime_with_unit(arg, unit, name, tz, errors: Optional[str]) -> Index:
517+
"""
518+
to_datetime specalized to the case where a 'unit' is passed.
519+
"""
520+
arg = getattr(arg, "_values", arg)
521+
522+
# GH#30050 pass an ndarray to tslib.array_with_unit_to_datetime
523+
# because it expects an ndarray argument
524+
if isinstance(arg, IntegerArray):
525+
result = arg.astype(f"datetime64[{unit}]")
526+
tz_parsed = None
527+
else:
528+
result, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors)
529+
530+
if errors == "ignore":
531+
# Index constructor _may_ infer to DatetimeIndex
532+
result = Index(result, name=name)
533+
else:
534+
result = DatetimeIndex(result, name=name)
535+
536+
if not isinstance(result, DatetimeIndex):
537+
return result
538+
539+
# GH#23758: We may still need to localize the result with tz
540+
# GH#25546: Apply tz_parsed first (from arg), then tz (from caller)
541+
# result will be naive but in UTC
542+
result = result.tz_localize("UTC").tz_convert(tz_parsed)
543+
544+
if tz is not None:
545+
if result.tz is None:
546+
result = result.tz_localize(tz)
547+
else:
548+
result = result.tz_convert(tz)
549+
return result
550+
551+
520552
def _adjust_to_origin(arg, origin, unit):
521553
"""
522554
Helper function for to_datetime.
@@ -987,16 +1019,16 @@ def coerce(values):
9871019
return values
9881020

9891021

990-
def _attempt_YYYYMMDD(arg, errors):
1022+
def _attempt_YYYYMMDD(arg: np.ndarray, errors: Optional[str]) -> Optional[np.ndarray]:
9911023
"""
9921024
try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like,
9931025
arg is a passed in as an object dtype, but could really be ints/strings
9941026
with nan-like/or floats (e.g. with nan)
9951027
9961028
Parameters
9971029
----------
998-
arg : passed value
999-
errors : 'raise','ignore','coerce'
1030+
arg : np.ndarray[object]
1031+
errors : {'raise','ignore','coerce'}
10001032
"""
10011033

10021034
def calc(carg):

0 commit comments

Comments
 (0)