pandas-dev · jbrockmendel · Apr 23, 2021
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
@@ -1413,33 +1413,6 @@ def is_extension_type(arr) -> bool:
     return False
 
 
-def is_1d_only_ea_obj(obj: Any) -> bool:
-    """
-    ExtensionArray that does not support 2D, or more specifically that does
-    not use HybridBlock.
-    """
-    from pandas.core.arrays import (
-        DatetimeArray,
-        ExtensionArray,
-        TimedeltaArray,
-    )
-
-    return isinstance(obj, ExtensionArray) and not isinstance(
-        obj, (DatetimeArray, TimedeltaArray)
-    )
-
-
-def is_1d_only_ea_dtype(dtype: Optional[DtypeObj]) -> bool:
-    """
-    Analogue to is_extension_array_dtype but excluding DatetimeTZDtype.
-    """
-    # Note: if other EA dtypes are ever held in HybridBlock, exclude those
-    #  here too.
-    # NB: need to check DatetimeTZDtype and not is_datetime64tz_dtype
-    #  to exclude ArrowTimestampUSDtype
-    return isinstance(dtype, ExtensionDtype) and not isinstance(dtype, DatetimeTZDtype)
-
-
 def is_extension_array_dtype(arr_or_dtype) -> bool:
     """
     Check if an object is a pandas extension array type.

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
@@ -113,15 +113,11 @@ def is_nonempty(x) -> bool:
         to_concat = non_empties
 
     kinds = {obj.dtype.kind for obj in to_concat}
-    contains_datetime = any(kind in ["m", "M"] for kind in kinds)
 
     all_empty = not len(non_empties)
     single_dtype = len({x.dtype for x in to_concat}) == 1
     any_ea = any(isinstance(x.dtype, ExtensionDtype) for x in to_concat)
 
-    if contains_datetime:
-        return _concat_datetime(to_concat, axis=axis)
-
     if any_ea:
         # we ignore axis here, as internally concatting with EAs is always
         # for axis=0
@@ -135,6 +131,9 @@ def is_nonempty(x) -> bool:
         else:
             return np.concatenate(to_concat)
 
+    elif any(kind in ["m", "M"] for kind in kinds):
+        return _concat_datetime(to_concat, axis=axis)
+
     elif all_empty:
         # we have all empties, but may need to coerce the result dtype to
         # object if we have non-numeric type operands (numpy would otherwise
@@ -350,5 +349,14 @@ def _concat_datetime(to_concat, axis=0):
         #  in Timestamp/Timedelta
         return _concatenate_2d([x.astype(object) for x in to_concat], axis=axis)
 
+    if axis == 1:
+        # TODO(EA2D): kludge not necessary with 2D EAs
+        to_concat = [x.reshape(1, -1) if x.ndim == 1 else x for x in to_concat]
+
     result = type(to_concat[0])._concat_same_type(to_concat, axis=axis)
+
+    if result.ndim == 2 and isinstance(result.dtype, ExtensionDtype):
+        # TODO(EA2D): kludge not necessary with 2D EAs
+        assert result.shape[0] == 1
+        result = result[0]
     return result
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -98,7 +98,6 @@
 from pandas.core.dtypes.common import (
     ensure_platform_int,
     infer_dtype_from_object,
-    is_1d_only_ea_dtype,
     is_bool_dtype,
     is_dataclass,
     is_datetime64_any_dtype,
@@ -846,9 +845,7 @@ def _can_fast_transpose(self) -> bool:
         if len(blocks) != 1:
             return False
 
-        dtype = blocks[0].dtype
-        # TODO(EA2D) special case would be unnecessary with 2D EAs
-        return not is_1d_only_ea_dtype(dtype)
+        return not self._mgr.any_extension_types
 
     # ----------------------------------------------------------------------
     # Rendering Methods

diff --git a/pandas/core/internals/api.py b/pandas/core/internals/api.py
@@ -6,7 +6,7 @@
 2) Use only functions exposed here (or in core.internals)
 
 """
-from __future__ import annotations
+from typing import Optional
 
 import numpy as np
 
@@ -23,15 +23,14 @@
     Block,
     DatetimeTZBlock,
     check_ndim,
-    ensure_block_shape,
     extract_pandas_array,
     get_block_type,
     maybe_coerce_values,
 )
 
 
 def make_block(
-    values, placement, klass=None, ndim=None, dtype: Dtype | None = None
+    values, placement, klass=None, ndim=None, dtype: Optional[Dtype] = None
 ) -> Block:
     """
     This is a pseudo-public analogue to blocks.new_block.
@@ -49,29 +48,24 @@ def make_block(
 
     values, dtype = extract_pandas_array(values, dtype, ndim)
 
-    needs_reshape = False
     if klass is None:
         dtype = dtype or values.dtype
         klass = get_block_type(values, dtype)
 
     elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values.dtype):
         # pyarrow calls get here
         values = DatetimeArray._simple_new(values, dtype=dtype)
-        needs_reshape = True
 
     if not isinstance(placement, BlockPlacement):
         placement = BlockPlacement(placement)
 
     ndim = maybe_infer_ndim(values, placement, ndim)
-    if needs_reshape:
-        values = ensure_block_shape(values, ndim)
-
     check_ndim(values, placement, ndim)
     values = maybe_coerce_values(values)
     return klass(values, ndim=ndim, placement=placement)
 
 
-def maybe_infer_ndim(values, placement: BlockPlacement, ndim: int | None) -> int:
+def maybe_infer_ndim(values, placement: BlockPlacement, ndim: Optional[int]) -> int:
     """
     If `ndim` is not provided, infer it from placment and values.
     """

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -42,8 +42,6 @@
     soft_convert_objects,
 )
 from pandas.core.dtypes.common import (
-    is_1d_only_ea_dtype,
-    is_1d_only_ea_obj,
     is_categorical_dtype,
     is_dtype_equal,
     is_extension_array_dtype,
@@ -226,6 +224,7 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:
         # expected "ndarray")
         return self.values  # type: ignore[return-value]
 
+    @final
     def get_block_values_for_json(self) -> np.ndarray:
         """
         This is used in the JSON C code.
@@ -416,11 +415,7 @@ def _split_op_result(self, result) -> list[Block]:
             # if we get a 2D ExtensionArray, we need to split it into 1D pieces
             nbs = []
             for i, loc in enumerate(self._mgr_locs):
-                if not is_1d_only_ea_obj(result):
-                    vals = result[i : i + 1]
-                else:
-                    vals = result[i]
-
+                vals = result[i]
                 block = self.make_block(values=vals, placement=loc)
                 nbs.append(block)
             return nbs
@@ -1675,7 +1670,7 @@ class NumericBlock(NumpyBlock):
     is_numeric = True
 
 
-class NDArrayBackedExtensionBlock(libinternals.Block, EABackedBlock):
+class NDArrayBackedExtensionBlock(EABackedBlock):
     """
     Block backed by an NDArrayBackedExtensionArray
     """
@@ -1688,6 +1683,11 @@ def is_view(self) -> bool:
         # check the ndarray values of the DatetimeIndex values
         return self.values._ndarray.base is not None
 
+    def iget(self, key):
+        # GH#31649 we need to wrap scalars in Timestamp/Timedelta
+        # TODO(EA2D): this can be removed if we ever have 2D EA
+        return self.values.reshape(self.shape)[key]
+
     def setitem(self, indexer, value):
         if not self._can_hold_element(value):
             # TODO: general case needs casting logic.
@@ -1707,21 +1707,24 @@ def putmask(self, mask, new) -> list[Block]:
         if not self._can_hold_element(new):
             return self.astype(object).putmask(mask, new)
 
-        arr = self.values
+        # TODO(EA2D): reshape unnecessary with 2D EAs
+        arr = self.values.reshape(self.shape)
         arr.T.putmask(mask, new)
         return [self]
 
     def where(self, other, cond, errors="raise") -> list[Block]:
         # TODO(EA2D): reshape unnecessary with 2D EAs
-        arr = self.values
+        arr = self.values.reshape(self.shape)
 
         cond = extract_bool_array(cond)
 
         try:
             res_values = arr.T.where(cond, other).T
         except (ValueError, TypeError):
-            return Block.where(self, other, cond, errors=errors)
+            return super().where(other, cond, errors=errors)
 
+        # TODO(EA2D): reshape not needed with 2D EAs
+        res_values = res_values.reshape(self.values.shape)
         nb = self.make_block_same_class(res_values)
         return [nb]
 
@@ -1745,13 +1748,15 @@ def diff(self, n: int, axis: int = 0) -> list[Block]:
         The arguments here are mimicking shift so they are called correctly
         by apply.
         """
-        values = self.values
+        # TODO(EA2D): reshape not necessary with 2D EAs
+        values = self.values.reshape(self.shape)
 
         new_values = values - values.shift(n, axis=axis)
         return [self.make_block(new_values)]
 
     def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]:
-        values = self.values
+        # TODO(EA2D) this is unnecessary if these blocks are backed by 2D EAs
+        values = self.values.reshape(self.shape)
         new_values = values.shift(periods, fill_value=fill_value, axis=axis)
         return [self.make_block_same_class(new_values)]
 
@@ -1771,27 +1776,31 @@ def fillna(
         return [self.make_block_same_class(values=new_values)]
 
 
-class DatetimeLikeBlock(NDArrayBackedExtensionBlock):
+class DatetimeLikeBlock(libinternals.Block, NDArrayBackedExtensionBlock):
     """Block for datetime64[ns], timedelta64[ns]."""
 
     __slots__ = ()
     is_numeric = False
     values: DatetimeArray | TimedeltaArray
 
-    def get_block_values_for_json(self):
-        # Not necessary to override, but helps perf
-        return self.values._ndarray
 
-
-class DatetimeTZBlock(DatetimeLikeBlock):
+class DatetimeTZBlock(ExtensionBlock, NDArrayBackedExtensionBlock):
     """ implement a datetime64 block with a tz attribute """
 
     values: DatetimeArray
 
     __slots__ = ()
     is_extension = True
-    _validate_ndim = True
-    _can_consolidate = False
+    is_numeric = False
+
+    diff = NDArrayBackedExtensionBlock.diff
+    where = NDArrayBackedExtensionBlock.where
+    putmask = NDArrayBackedExtensionBlock.putmask
+    fillna = NDArrayBackedExtensionBlock.fillna
+
+    get_values = NDArrayBackedExtensionBlock.get_values
+
+    is_view = NDArrayBackedExtensionBlock.is_view
 
 
 class ObjectBlock(NumpyBlock):
@@ -1958,7 +1967,7 @@ def check_ndim(values, placement: BlockPlacement, ndim: int):
             f"values.ndim > ndim [{values.ndim} > {ndim}]"
         )
 
-    elif not is_1d_only_ea_dtype(values.dtype):
+    elif isinstance(values.dtype, np.dtype):
         # TODO(EA2D): special case not needed with 2D EAs
         if values.ndim != ndim:
             raise ValueError(
@@ -1972,7 +1981,7 @@ def check_ndim(values, placement: BlockPlacement, ndim: int):
             )
     elif ndim == 2 and len(placement) != 1:
         # TODO(EA2D): special case unnecessary with 2D EAs
-        raise ValueError("need to split")
+        raise AssertionError("block.size != values.size")
 
 
 def extract_pandas_array(
@@ -2017,9 +2026,8 @@ def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike:
     """
     Reshape if possible to have values.ndim == ndim.
     """
-
     if values.ndim < ndim:
-        if not is_1d_only_ea_dtype(values.dtype):
+        if not is_extension_array_dtype(values.dtype):
             # TODO(EA2D): https://github.com/pandas-dev/pandas/issues/23023
             # block.shape is incorrect for "2D" ExtensionArrays
             # We can't, and don't need to, reshape.

diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
@@ -5,7 +5,6 @@
 from typing import (
     TYPE_CHECKING,
     Sequence,
-    cast,
 )
 
 import numpy as np
@@ -24,8 +23,6 @@
     find_common_type,
 )
 from pandas.core.dtypes.common import (
-    is_1d_only_ea_dtype,
-    is_1d_only_ea_obj,
     is_datetime64tz_dtype,
     is_dtype_equal,
     is_extension_array_dtype,
@@ -213,8 +210,8 @@ def concatenate_managers(
                 values = np.concatenate(vals, axis=blk.ndim - 1)
             else:
                 # TODO(EA2D): special-casing not needed with 2D EAs
-                values = concat_compat(vals, axis=1)
-                values = ensure_block_shape(values, blk.ndim)
+                values = concat_compat(vals)
+                values = ensure_block_shape(values, ndim=2)
 
             values = ensure_wrapped_if_datetimelike(values)
 
@@ -415,16 +412,13 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
                         fill_value = None
 
                 if is_datetime64tz_dtype(empty_dtype):
-                    i8values = np.full(self.shape, fill_value.value)
+                    # TODO(EA2D): special case unneeded with 2D EAs
+                    i8values = np.full(self.shape[1], fill_value.value)
                     return DatetimeArray(i8values, dtype=empty_dtype)
-
                 elif is_extension_array_dtype(blk_dtype):
                     pass
-
-                elif is_1d_only_ea_dtype(empty_dtype):
-                    empty_dtype = cast(ExtensionDtype, empty_dtype)
+                elif isinstance(empty_dtype, ExtensionDtype):
                     cls = empty_dtype.construct_array_type()
-
                     missing_arr = cls._from_sequence([], dtype=empty_dtype)
                     ncols, nrows = self.shape
                     assert ncols == 1, ncols
@@ -435,7 +429,6 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
                 else:
                     # NB: we should never get here with empty_dtype integer or bool;
                     #  if we did, the missing_arr.fill would cast to gibberish
-                    empty_dtype = cast(np.dtype, empty_dtype)
 
                     missing_arr = np.empty(self.shape, dtype=empty_dtype)
                     missing_arr.fill(fill_value)
@@ -500,17 +493,15 @@ def _concatenate_join_units(
                     concat_values = concat_values.copy()
             else:
                 concat_values = concat_values.copy()
-
-    elif any(is_1d_only_ea_obj(t) for t in to_concat):
-        # TODO(EA2D): special case not needed if all EAs used HybridBlocks
-        # NB: we are still assuming here that Hybrid blocks have shape (1, N)
+    elif any(isinstance(t, ExtensionArray) and t.ndim == 1 for t in to_concat):
         # concatting with at least one EA means we are concatting a single column
         # the non-EA values are 2D arrays with shape (1, n)
-
         # error: Invalid index type "Tuple[int, slice]" for
         # "Union[ExtensionArray, ndarray]"; expected type "Union[int, slice, ndarray]"
         to_concat = [
-            t if is_1d_only_ea_obj(t) else t[0, :]  # type: ignore[index]
+            t
+            if (isinstance(t, ExtensionArray) and t.ndim == 1)
+            else t[0, :]  # type: ignore[index]
             for t in to_concat
         ]
         concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True)