From d7c2cf7182faf1cae74dc365537ea3db3fb52007 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 13 Apr 2020 07:25:43 -0700 Subject: [PATCH] REF: simplify concat_datetime --- pandas/core/arrays/datetimelike.py | 11 ++-- pandas/core/dtypes/concat.py | 93 ++++++++--------------------- pandas/core/indexes/datetimelike.py | 8 +-- 3 files changed, 36 insertions(+), 76 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 30a34282889f8..ece92acae6461 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -723,7 +723,7 @@ def take(self, indices, allow_fill=False, fill_value=None): return type(self)(new_values, dtype=self.dtype) @classmethod - def _concat_same_type(cls, to_concat): + def _concat_same_type(cls, to_concat, axis: int = 0): # do not pass tz to set because tzlocal cannot be hashed dtypes = {str(x.dtype) for x in to_concat} @@ -733,14 +733,15 @@ def _concat_same_type(cls, to_concat): obj = to_concat[0] dtype = obj.dtype - values = np.concatenate([x.asi8 for x in to_concat]) + i8values = [x.asi8 for x in to_concat] + values = np.concatenate(i8values, axis=axis) - if is_period_dtype(to_concat[0].dtype): + new_freq = None + if is_period_dtype(dtype): new_freq = obj.freq - else: + elif axis == 0: # GH 3232: If the concat result is evenly spaced, we can retain the # original frequency - new_freq = None to_concat = [x for x in to_concat if len(x)] if obj.freq is not None and all(x.freq == obj.freq for x in to_concat): diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 301c9bb7b3f5c..367cf25fe763b 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -4,11 +4,7 @@ import numpy as np -from pandas._libs import tslib, tslibs - from pandas.core.dtypes.common import ( - DT64NS_DTYPE, - TD64NS_DTYPE, is_bool_dtype, is_categorical_dtype, is_datetime64_dtype, @@ -19,13 +15,7 @@ is_sparse, is_timedelta64_dtype, ) -from pandas.core.dtypes.generic import ( - ABCCategoricalIndex, - ABCDatetimeArray, - ABCIndexClass, - ABCRangeIndex, - ABCSeries, -) +from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCRangeIndex, ABCSeries def get_dtype_kinds(l): @@ -390,70 +380,39 @@ def concat_datetime(to_concat, axis=0, typs=None): if typs is None: typs = get_dtype_kinds(to_concat) - # multiple types, need to coerce to object - if len(typs) != 1: - return _concatenate_2d( - [_convert_datetimelike_to_object(x) for x in to_concat], axis=axis - ) - - # must be single dtype - if any(typ.startswith("datetime") for typ in typs): - - if "datetime" in typs: - to_concat = [x.astype(np.int64, copy=False) for x in to_concat] - return _concatenate_2d(to_concat, axis=axis).view(DT64NS_DTYPE) - else: - # when to_concat has different tz, len(typs) > 1. - # thus no need to care - return _concat_datetimetz(to_concat) - - elif "timedelta" in typs: - return _concatenate_2d([x.view(np.int64) for x in to_concat], axis=axis).view( - TD64NS_DTYPE - ) - - elif any(typ.startswith("period") for typ in typs): - assert len(typs) == 1 - cls = to_concat[0] - new_values = cls._concat_same_type(to_concat) - return new_values - + to_concat = [_wrap_datetimelike(x) for x in to_concat] + single_dtype = len({x.dtype for x in to_concat}) == 1 -def _convert_datetimelike_to_object(x): - # coerce datetimelike array to object dtype + # multiple types, need to coerce to object + if not single_dtype: + # wrap_datetimelike ensures that astype(object) wraps in Timestamp/Timedelta + return _concatenate_2d([x.astype(object) for x in to_concat], axis=axis) - # if dtype is of datetimetz or timezone - if x.dtype.kind == DT64NS_DTYPE.kind: - if getattr(x, "tz", None) is not None: - x = np.asarray(x.astype(object)) - else: - shape = x.shape - x = tslib.ints_to_pydatetime(x.view(np.int64).ravel(), box="timestamp") - x = x.reshape(shape) + if axis == 1: + # TODO(EA2D): kludge not necessary with 2D EAs + to_concat = [x.reshape(1, -1) if x.ndim == 1 else x for x in to_concat] - elif x.dtype == TD64NS_DTYPE: - shape = x.shape - x = tslibs.ints_to_pytimedelta(x.view(np.int64).ravel(), box=True) - x = x.reshape(shape) + result = type(to_concat[0])._concat_same_type(to_concat, axis=axis) - return x + if result.ndim == 2 and is_extension_array_dtype(result.dtype): + # TODO(EA2D): kludge not necessary with 2D EAs + assert result.shape[0] == 1 + result = result[0] + return result -def _concat_datetimetz(to_concat, name=None): +def _wrap_datetimelike(arr): """ - concat DatetimeIndex with the same tz - all inputs must be DatetimeIndex - it is used in DatetimeIndex.append also + Wrap datetime64 and timedelta64 ndarrays in DatetimeArray/TimedeltaArray. + + DTA/TDA handle .astype(object) correctly. """ - # Right now, internals will pass a List[DatetimeArray] here - # for reductions like quantile. I would like to disentangle - # all this before we get here. - sample = to_concat[0] - - if isinstance(sample, ABCIndexClass): - return sample._concat_same_dtype(to_concat, name=name) - elif isinstance(sample, ABCDatetimeArray): - return sample._concat_same_type(to_concat) + from pandas.core.construction import array as pd_array, extract_array + + arr = extract_array(arr, extract_numpy=True) + if isinstance(arr, np.ndarray) and arr.dtype.kind in ["m", "M"]: + arr = pd_array(arr) + return arr def _concat_sparse(to_concat, axis=0, typs=None): diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 25333b3a08dce..c15680a47d216 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -778,8 +778,8 @@ def _fast_union(self, other, sort=None): left, right = self, other left_start = left[0] loc = right.searchsorted(left_start, side="left") - right_chunk = right.values[:loc] - dates = concat_compat((left.values, right_chunk)) + right_chunk = right._values[:loc] + dates = concat_compat([left._values, right_chunk]) result = self._shallow_copy(dates) result._set_freq("infer") # TODO: can we infer that it has self.freq? @@ -793,8 +793,8 @@ def _fast_union(self, other, sort=None): # concatenate if left_end < right_end: loc = right.searchsorted(left_end, side="right") - right_chunk = right.values[loc:] - dates = concat_compat((left.values, right_chunk)) + right_chunk = right._values[loc:] + dates = concat_compat([left._values, right_chunk]) result = self._shallow_copy(dates) result._set_freq("infer") # TODO: can we infer that it has self.freq?