Skip to content

Revert "REF: Back DatetimeTZBlock with sometimes-2D DTA" #41110

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 0 additions & 27 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1413,33 +1413,6 @@ def is_extension_type(arr) -> bool:
return False


def is_1d_only_ea_obj(obj: Any) -> bool:
"""
ExtensionArray that does not support 2D, or more specifically that does
not use HybridBlock.
"""
from pandas.core.arrays import (
DatetimeArray,
ExtensionArray,
TimedeltaArray,
)

return isinstance(obj, ExtensionArray) and not isinstance(
obj, (DatetimeArray, TimedeltaArray)
)


def is_1d_only_ea_dtype(dtype: Optional[DtypeObj]) -> bool:
"""
Analogue to is_extension_array_dtype but excluding DatetimeTZDtype.
"""
# Note: if other EA dtypes are ever held in HybridBlock, exclude those
# here too.
# NB: need to check DatetimeTZDtype and not is_datetime64tz_dtype
# to exclude ArrowTimestampUSDtype
return isinstance(dtype, ExtensionDtype) and not isinstance(dtype, DatetimeTZDtype)


def is_extension_array_dtype(arr_or_dtype) -> bool:
"""
Check if an object is a pandas extension array type.
Expand Down
16 changes: 12 additions & 4 deletions pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,15 +113,11 @@ def is_nonempty(x) -> bool:
to_concat = non_empties

kinds = {obj.dtype.kind for obj in to_concat}
contains_datetime = any(kind in ["m", "M"] for kind in kinds)

all_empty = not len(non_empties)
single_dtype = len({x.dtype for x in to_concat}) == 1
any_ea = any(isinstance(x.dtype, ExtensionDtype) for x in to_concat)

if contains_datetime:
return _concat_datetime(to_concat, axis=axis)

if any_ea:
# we ignore axis here, as internally concatting with EAs is always
# for axis=0
Expand All @@ -135,6 +131,9 @@ def is_nonempty(x) -> bool:
else:
return np.concatenate(to_concat)

elif any(kind in ["m", "M"] for kind in kinds):
return _concat_datetime(to_concat, axis=axis)

elif all_empty:
# we have all empties, but may need to coerce the result dtype to
# object if we have non-numeric type operands (numpy would otherwise
Expand Down Expand Up @@ -350,5 +349,14 @@ def _concat_datetime(to_concat, axis=0):
# in Timestamp/Timedelta
return _concatenate_2d([x.astype(object) for x in to_concat], axis=axis)

if axis == 1:
# TODO(EA2D): kludge not necessary with 2D EAs
to_concat = [x.reshape(1, -1) if x.ndim == 1 else x for x in to_concat]

result = type(to_concat[0])._concat_same_type(to_concat, axis=axis)

if result.ndim == 2 and isinstance(result.dtype, ExtensionDtype):
# TODO(EA2D): kludge not necessary with 2D EAs
assert result.shape[0] == 1
result = result[0]
return result
5 changes: 1 addition & 4 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@
from pandas.core.dtypes.common import (
ensure_platform_int,
infer_dtype_from_object,
is_1d_only_ea_dtype,
is_bool_dtype,
is_dataclass,
is_datetime64_any_dtype,
Expand Down Expand Up @@ -846,9 +845,7 @@ def _can_fast_transpose(self) -> bool:
if len(blocks) != 1:
return False

dtype = blocks[0].dtype
# TODO(EA2D) special case would be unnecessary with 2D EAs
return not is_1d_only_ea_dtype(dtype)
return not self._mgr.any_extension_types

# ----------------------------------------------------------------------
# Rendering Methods
Expand Down
12 changes: 3 additions & 9 deletions pandas/core/internals/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
2) Use only functions exposed here (or in core.internals)

"""
from __future__ import annotations
from typing import Optional

import numpy as np

Expand All @@ -23,15 +23,14 @@
Block,
DatetimeTZBlock,
check_ndim,
ensure_block_shape,
extract_pandas_array,
get_block_type,
maybe_coerce_values,
)


def make_block(
values, placement, klass=None, ndim=None, dtype: Dtype | None = None
values, placement, klass=None, ndim=None, dtype: Optional[Dtype] = None
) -> Block:
"""
This is a pseudo-public analogue to blocks.new_block.
Expand All @@ -49,29 +48,24 @@ def make_block(

values, dtype = extract_pandas_array(values, dtype, ndim)

needs_reshape = False
if klass is None:
dtype = dtype or values.dtype
klass = get_block_type(values, dtype)

elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values.dtype):
# pyarrow calls get here
values = DatetimeArray._simple_new(values, dtype=dtype)
needs_reshape = True

if not isinstance(placement, BlockPlacement):
placement = BlockPlacement(placement)

ndim = maybe_infer_ndim(values, placement, ndim)
if needs_reshape:
values = ensure_block_shape(values, ndim)

check_ndim(values, placement, ndim)
values = maybe_coerce_values(values)
return klass(values, ndim=ndim, placement=placement)


def maybe_infer_ndim(values, placement: BlockPlacement, ndim: int | None) -> int:
def maybe_infer_ndim(values, placement: BlockPlacement, ndim: Optional[int]) -> int:
"""
If `ndim` is not provided, infer it from placment and values.
"""
Expand Down
58 changes: 33 additions & 25 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@
soft_convert_objects,
)
from pandas.core.dtypes.common import (
is_1d_only_ea_dtype,
is_1d_only_ea_obj,
is_categorical_dtype,
is_dtype_equal,
is_extension_array_dtype,
Expand Down Expand Up @@ -226,6 +224,7 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:
# expected "ndarray")
return self.values # type: ignore[return-value]

@final
def get_block_values_for_json(self) -> np.ndarray:
"""
This is used in the JSON C code.
Expand Down Expand Up @@ -416,11 +415,7 @@ def _split_op_result(self, result) -> list[Block]:
# if we get a 2D ExtensionArray, we need to split it into 1D pieces
nbs = []
for i, loc in enumerate(self._mgr_locs):
if not is_1d_only_ea_obj(result):
vals = result[i : i + 1]
else:
vals = result[i]

vals = result[i]
block = self.make_block(values=vals, placement=loc)
nbs.append(block)
return nbs
Expand Down Expand Up @@ -1675,7 +1670,7 @@ class NumericBlock(NumpyBlock):
is_numeric = True


class NDArrayBackedExtensionBlock(libinternals.Block, EABackedBlock):
class NDArrayBackedExtensionBlock(EABackedBlock):
"""
Block backed by an NDArrayBackedExtensionArray
"""
Expand All @@ -1688,6 +1683,11 @@ def is_view(self) -> bool:
# check the ndarray values of the DatetimeIndex values
return self.values._ndarray.base is not None

def iget(self, key):
# GH#31649 we need to wrap scalars in Timestamp/Timedelta
# TODO(EA2D): this can be removed if we ever have 2D EA
return self.values.reshape(self.shape)[key]

def setitem(self, indexer, value):
if not self._can_hold_element(value):
# TODO: general case needs casting logic.
Expand All @@ -1707,21 +1707,24 @@ def putmask(self, mask, new) -> list[Block]:
if not self._can_hold_element(new):
return self.astype(object).putmask(mask, new)

arr = self.values
# TODO(EA2D): reshape unnecessary with 2D EAs
arr = self.values.reshape(self.shape)
arr.T.putmask(mask, new)
return [self]

def where(self, other, cond, errors="raise") -> list[Block]:
# TODO(EA2D): reshape unnecessary with 2D EAs
arr = self.values
arr = self.values.reshape(self.shape)

cond = extract_bool_array(cond)

try:
res_values = arr.T.where(cond, other).T
except (ValueError, TypeError):
return Block.where(self, other, cond, errors=errors)
return super().where(other, cond, errors=errors)

# TODO(EA2D): reshape not needed with 2D EAs
res_values = res_values.reshape(self.values.shape)
nb = self.make_block_same_class(res_values)
return [nb]

Expand All @@ -1745,13 +1748,15 @@ def diff(self, n: int, axis: int = 0) -> list[Block]:
The arguments here are mimicking shift so they are called correctly
by apply.
"""
values = self.values
# TODO(EA2D): reshape not necessary with 2D EAs
values = self.values.reshape(self.shape)

new_values = values - values.shift(n, axis=axis)
return [self.make_block(new_values)]

def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]:
values = self.values
# TODO(EA2D) this is unnecessary if these blocks are backed by 2D EAs
values = self.values.reshape(self.shape)
new_values = values.shift(periods, fill_value=fill_value, axis=axis)
return [self.make_block_same_class(new_values)]

Expand All @@ -1771,27 +1776,31 @@ def fillna(
return [self.make_block_same_class(values=new_values)]


class DatetimeLikeBlock(NDArrayBackedExtensionBlock):
class DatetimeLikeBlock(libinternals.Block, NDArrayBackedExtensionBlock):
"""Block for datetime64[ns], timedelta64[ns]."""

__slots__ = ()
is_numeric = False
values: DatetimeArray | TimedeltaArray

def get_block_values_for_json(self):
# Not necessary to override, but helps perf
return self.values._ndarray


class DatetimeTZBlock(DatetimeLikeBlock):
class DatetimeTZBlock(ExtensionBlock, NDArrayBackedExtensionBlock):
""" implement a datetime64 block with a tz attribute """

values: DatetimeArray

__slots__ = ()
is_extension = True
_validate_ndim = True
_can_consolidate = False
is_numeric = False

diff = NDArrayBackedExtensionBlock.diff
where = NDArrayBackedExtensionBlock.where
putmask = NDArrayBackedExtensionBlock.putmask
fillna = NDArrayBackedExtensionBlock.fillna

get_values = NDArrayBackedExtensionBlock.get_values

is_view = NDArrayBackedExtensionBlock.is_view


class ObjectBlock(NumpyBlock):
Expand Down Expand Up @@ -1958,7 +1967,7 @@ def check_ndim(values, placement: BlockPlacement, ndim: int):
f"values.ndim > ndim [{values.ndim} > {ndim}]"
)

elif not is_1d_only_ea_dtype(values.dtype):
elif isinstance(values.dtype, np.dtype):
# TODO(EA2D): special case not needed with 2D EAs
if values.ndim != ndim:
raise ValueError(
Expand All @@ -1972,7 +1981,7 @@ def check_ndim(values, placement: BlockPlacement, ndim: int):
)
elif ndim == 2 and len(placement) != 1:
# TODO(EA2D): special case unnecessary with 2D EAs
raise ValueError("need to split")
raise AssertionError("block.size != values.size")


def extract_pandas_array(
Expand Down Expand Up @@ -2017,9 +2026,8 @@ def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike:
"""
Reshape if possible to have values.ndim == ndim.
"""

if values.ndim < ndim:
if not is_1d_only_ea_dtype(values.dtype):
if not is_extension_array_dtype(values.dtype):
# TODO(EA2D): https://github.com/pandas-dev/pandas/issues/23023
# block.shape is incorrect for "2D" ExtensionArrays
# We can't, and don't need to, reshape.
Expand Down
27 changes: 9 additions & 18 deletions pandas/core/internals/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from typing import (
TYPE_CHECKING,
Sequence,
cast,
)

import numpy as np
Expand All @@ -24,8 +23,6 @@
find_common_type,
)
from pandas.core.dtypes.common import (
is_1d_only_ea_dtype,
is_1d_only_ea_obj,
is_datetime64tz_dtype,
is_dtype_equal,
is_extension_array_dtype,
Expand Down Expand Up @@ -213,8 +210,8 @@ def concatenate_managers(
values = np.concatenate(vals, axis=blk.ndim - 1)
else:
# TODO(EA2D): special-casing not needed with 2D EAs
values = concat_compat(vals, axis=1)
values = ensure_block_shape(values, blk.ndim)
values = concat_compat(vals)
values = ensure_block_shape(values, ndim=2)

values = ensure_wrapped_if_datetimelike(values)

Expand Down Expand Up @@ -415,16 +412,13 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
fill_value = None

if is_datetime64tz_dtype(empty_dtype):
i8values = np.full(self.shape, fill_value.value)
# TODO(EA2D): special case unneeded with 2D EAs
i8values = np.full(self.shape[1], fill_value.value)
return DatetimeArray(i8values, dtype=empty_dtype)

elif is_extension_array_dtype(blk_dtype):
pass

elif is_1d_only_ea_dtype(empty_dtype):
empty_dtype = cast(ExtensionDtype, empty_dtype)
elif isinstance(empty_dtype, ExtensionDtype):
cls = empty_dtype.construct_array_type()

missing_arr = cls._from_sequence([], dtype=empty_dtype)
ncols, nrows = self.shape
assert ncols == 1, ncols
Expand All @@ -435,7 +429,6 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
else:
# NB: we should never get here with empty_dtype integer or bool;
# if we did, the missing_arr.fill would cast to gibberish
empty_dtype = cast(np.dtype, empty_dtype)

missing_arr = np.empty(self.shape, dtype=empty_dtype)
missing_arr.fill(fill_value)
Expand Down Expand Up @@ -500,17 +493,15 @@ def _concatenate_join_units(
concat_values = concat_values.copy()
else:
concat_values = concat_values.copy()

elif any(is_1d_only_ea_obj(t) for t in to_concat):
# TODO(EA2D): special case not needed if all EAs used HybridBlocks
# NB: we are still assuming here that Hybrid blocks have shape (1, N)
elif any(isinstance(t, ExtensionArray) and t.ndim == 1 for t in to_concat):
# concatting with at least one EA means we are concatting a single column
# the non-EA values are 2D arrays with shape (1, n)

# error: Invalid index type "Tuple[int, slice]" for
# "Union[ExtensionArray, ndarray]"; expected type "Union[int, slice, ndarray]"
to_concat = [
t if is_1d_only_ea_obj(t) else t[0, :] # type: ignore[index]
t
if (isinstance(t, ExtensionArray) and t.ndim == 1)
else t[0, :] # type: ignore[index]
for t in to_concat
]
concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True)
Expand Down
Loading