From d7c2cf7182faf1cae74dc365537ea3db3fb52007 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 13 Apr 2020 07:25:43 -0700
Subject: [PATCH] REF: simplify concat_datetime

---
 pandas/core/arrays/datetimelike.py  | 11 ++--
 pandas/core/dtypes/concat.py        | 93 ++++++++---------------------
 pandas/core/indexes/datetimelike.py |  8 +--
 3 files changed, 36 insertions(+), 76 deletions(-)

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 30a34282889f8..ece92acae6461 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -723,7 +723,7 @@ def take(self, indices, allow_fill=False, fill_value=None):
         return type(self)(new_values, dtype=self.dtype)
 
     @classmethod
-    def _concat_same_type(cls, to_concat):
+    def _concat_same_type(cls, to_concat, axis: int = 0):
 
         # do not pass tz to set because tzlocal cannot be hashed
         dtypes = {str(x.dtype) for x in to_concat}
@@ -733,14 +733,15 @@ def _concat_same_type(cls, to_concat):
         obj = to_concat[0]
         dtype = obj.dtype
 
-        values = np.concatenate([x.asi8 for x in to_concat])
+        i8values = [x.asi8 for x in to_concat]
+        values = np.concatenate(i8values, axis=axis)
 
-        if is_period_dtype(to_concat[0].dtype):
+        new_freq = None
+        if is_period_dtype(dtype):
             new_freq = obj.freq
-        else:
+        elif axis == 0:
             # GH 3232: If the concat result is evenly spaced, we can retain the
             # original frequency
-            new_freq = None
             to_concat = [x for x in to_concat if len(x)]
 
             if obj.freq is not None and all(x.freq == obj.freq for x in to_concat):
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 301c9bb7b3f5c..367cf25fe763b 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -4,11 +4,7 @@
 
 import numpy as np
 
-from pandas._libs import tslib, tslibs
-
 from pandas.core.dtypes.common import (
-    DT64NS_DTYPE,
-    TD64NS_DTYPE,
     is_bool_dtype,
     is_categorical_dtype,
     is_datetime64_dtype,
@@ -19,13 +15,7 @@
     is_sparse,
     is_timedelta64_dtype,
 )
-from pandas.core.dtypes.generic import (
-    ABCCategoricalIndex,
-    ABCDatetimeArray,
-    ABCIndexClass,
-    ABCRangeIndex,
-    ABCSeries,
-)
+from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCRangeIndex, ABCSeries
 
 
 def get_dtype_kinds(l):
@@ -390,70 +380,39 @@ def concat_datetime(to_concat, axis=0, typs=None):
     if typs is None:
         typs = get_dtype_kinds(to_concat)
 
-    # multiple types, need to coerce to object
-    if len(typs) != 1:
-        return _concatenate_2d(
-            [_convert_datetimelike_to_object(x) for x in to_concat], axis=axis
-        )
-
-    # must be single dtype
-    if any(typ.startswith("datetime") for typ in typs):
-
-        if "datetime" in typs:
-            to_concat = [x.astype(np.int64, copy=False) for x in to_concat]
-            return _concatenate_2d(to_concat, axis=axis).view(DT64NS_DTYPE)
-        else:
-            # when to_concat has different tz, len(typs) > 1.
-            # thus no need to care
-            return _concat_datetimetz(to_concat)
-
-    elif "timedelta" in typs:
-        return _concatenate_2d([x.view(np.int64) for x in to_concat], axis=axis).view(
-            TD64NS_DTYPE
-        )
-
-    elif any(typ.startswith("period") for typ in typs):
-        assert len(typs) == 1
-        cls = to_concat[0]
-        new_values = cls._concat_same_type(to_concat)
-        return new_values
-
+    to_concat = [_wrap_datetimelike(x) for x in to_concat]
+    single_dtype = len({x.dtype for x in to_concat}) == 1
 
-def _convert_datetimelike_to_object(x):
-    # coerce datetimelike array to object dtype
+    # multiple types, need to coerce to object
+    if not single_dtype:
+        # wrap_datetimelike ensures that astype(object) wraps in Timestamp/Timedelta
+        return _concatenate_2d([x.astype(object) for x in to_concat], axis=axis)
 
-    # if dtype is of datetimetz or timezone
-    if x.dtype.kind == DT64NS_DTYPE.kind:
-        if getattr(x, "tz", None) is not None:
-            x = np.asarray(x.astype(object))
-        else:
-            shape = x.shape
-            x = tslib.ints_to_pydatetime(x.view(np.int64).ravel(), box="timestamp")
-            x = x.reshape(shape)
+    if axis == 1:
+        # TODO(EA2D): kludge not necessary with 2D EAs
+        to_concat = [x.reshape(1, -1) if x.ndim == 1 else x for x in to_concat]
 
-    elif x.dtype == TD64NS_DTYPE:
-        shape = x.shape
-        x = tslibs.ints_to_pytimedelta(x.view(np.int64).ravel(), box=True)
-        x = x.reshape(shape)
+    result = type(to_concat[0])._concat_same_type(to_concat, axis=axis)
 
-    return x
+    if result.ndim == 2 and is_extension_array_dtype(result.dtype):
+        # TODO(EA2D): kludge not necessary with 2D EAs
+        assert result.shape[0] == 1
+        result = result[0]
+    return result
 
 
-def _concat_datetimetz(to_concat, name=None):
+def _wrap_datetimelike(arr):
     """
-    concat DatetimeIndex with the same tz
-    all inputs must be DatetimeIndex
-    it is used in DatetimeIndex.append also
+    Wrap datetime64 and timedelta64 ndarrays in DatetimeArray/TimedeltaArray.
+
+    DTA/TDA handle .astype(object) correctly.
     """
-    # Right now, internals will pass a List[DatetimeArray] here
-    # for reductions like quantile. I would like to disentangle
-    # all this before we get here.
-    sample = to_concat[0]
-
-    if isinstance(sample, ABCIndexClass):
-        return sample._concat_same_dtype(to_concat, name=name)
-    elif isinstance(sample, ABCDatetimeArray):
-        return sample._concat_same_type(to_concat)
+    from pandas.core.construction import array as pd_array, extract_array
+
+    arr = extract_array(arr, extract_numpy=True)
+    if isinstance(arr, np.ndarray) and arr.dtype.kind in ["m", "M"]:
+        arr = pd_array(arr)
+    return arr
 
 
 def _concat_sparse(to_concat, axis=0, typs=None):
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index 25333b3a08dce..c15680a47d216 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -778,8 +778,8 @@ def _fast_union(self, other, sort=None):
             left, right = self, other
             left_start = left[0]
             loc = right.searchsorted(left_start, side="left")
-            right_chunk = right.values[:loc]
-            dates = concat_compat((left.values, right_chunk))
+            right_chunk = right._values[:loc]
+            dates = concat_compat([left._values, right_chunk])
             result = self._shallow_copy(dates)
             result._set_freq("infer")
             # TODO: can we infer that it has self.freq?
@@ -793,8 +793,8 @@ def _fast_union(self, other, sort=None):
         # concatenate
         if left_end < right_end:
             loc = right.searchsorted(left_end, side="right")
-            right_chunk = right.values[loc:]
-            dates = concat_compat((left.values, right_chunk))
+            right_chunk = right._values[loc:]
+            dates = concat_compat([left._values, right_chunk])
             result = self._shallow_copy(dates)
             result._set_freq("infer")
             # TODO: can we infer that it has self.freq?