From 79660d71b56d33673edb4078962a744257200133 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 22 Apr 2021 18:26:37 -0700 Subject: [PATCH] Revert "REF: Back DatetimeTZBlock with sometimes-2D DTA (#41082)" This reverts commit 6b6d8fd073653219343de624a6cd513b09dae4c1. --- pandas/core/dtypes/common.py | 27 --------- pandas/core/dtypes/concat.py | 16 ++++-- pandas/core/frame.py | 5 +- pandas/core/internals/api.py | 12 +--- pandas/core/internals/blocks.py | 58 +++++++++++--------- pandas/core/internals/concat.py | 27 +++------ pandas/core/internals/construction.py | 54 +++++++++--------- pandas/core/internals/managers.py | 44 +++------------ pandas/core/reshape/reshape.py | 3 +- pandas/tests/arrays/test_datetimes.py | 10 ---- pandas/tests/extension/base/constructors.py | 13 ++--- pandas/tests/frame/methods/test_set_index.py | 7 +-- pandas/tests/frame/test_block_internals.py | 2 +- pandas/tests/internals/test_internals.py | 6 +- pandas/tests/series/test_constructors.py | 2 +- 15 files changed, 103 insertions(+), 183 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 593e42f7ed749..e207dac71752e 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1413,33 +1413,6 @@ def is_extension_type(arr) -> bool: return False -def is_1d_only_ea_obj(obj: Any) -> bool: - """ - ExtensionArray that does not support 2D, or more specifically that does - not use HybridBlock. - """ - from pandas.core.arrays import ( - DatetimeArray, - ExtensionArray, - TimedeltaArray, - ) - - return isinstance(obj, ExtensionArray) and not isinstance( - obj, (DatetimeArray, TimedeltaArray) - ) - - -def is_1d_only_ea_dtype(dtype: Optional[DtypeObj]) -> bool: - """ - Analogue to is_extension_array_dtype but excluding DatetimeTZDtype. - """ - # Note: if other EA dtypes are ever held in HybridBlock, exclude those - # here too. - # NB: need to check DatetimeTZDtype and not is_datetime64tz_dtype - # to exclude ArrowTimestampUSDtype - return isinstance(dtype, ExtensionDtype) and not isinstance(dtype, DatetimeTZDtype) - - def is_extension_array_dtype(arr_or_dtype) -> bool: """ Check if an object is a pandas extension array type. diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index b0d00775bbed1..cfadb3e9f45c5 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -113,15 +113,11 @@ def is_nonempty(x) -> bool: to_concat = non_empties kinds = {obj.dtype.kind for obj in to_concat} - contains_datetime = any(kind in ["m", "M"] for kind in kinds) all_empty = not len(non_empties) single_dtype = len({x.dtype for x in to_concat}) == 1 any_ea = any(isinstance(x.dtype, ExtensionDtype) for x in to_concat) - if contains_datetime: - return _concat_datetime(to_concat, axis=axis) - if any_ea: # we ignore axis here, as internally concatting with EAs is always # for axis=0 @@ -135,6 +131,9 @@ def is_nonempty(x) -> bool: else: return np.concatenate(to_concat) + elif any(kind in ["m", "M"] for kind in kinds): + return _concat_datetime(to_concat, axis=axis) + elif all_empty: # we have all empties, but may need to coerce the result dtype to # object if we have non-numeric type operands (numpy would otherwise @@ -350,5 +349,14 @@ def _concat_datetime(to_concat, axis=0): # in Timestamp/Timedelta return _concatenate_2d([x.astype(object) for x in to_concat], axis=axis) + if axis == 1: + # TODO(EA2D): kludge not necessary with 2D EAs + to_concat = [x.reshape(1, -1) if x.ndim == 1 else x for x in to_concat] + result = type(to_concat[0])._concat_same_type(to_concat, axis=axis) + + if result.ndim == 2 and isinstance(result.dtype, ExtensionDtype): + # TODO(EA2D): kludge not necessary with 2D EAs + assert result.shape[0] == 1 + result = result[0] return result diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7f970a72cb12c..4a7ed2bfc18df 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -98,7 +98,6 @@ from pandas.core.dtypes.common import ( ensure_platform_int, infer_dtype_from_object, - is_1d_only_ea_dtype, is_bool_dtype, is_dataclass, is_datetime64_any_dtype, @@ -846,9 +845,7 @@ def _can_fast_transpose(self) -> bool: if len(blocks) != 1: return False - dtype = blocks[0].dtype - # TODO(EA2D) special case would be unnecessary with 2D EAs - return not is_1d_only_ea_dtype(dtype) + return not self._mgr.any_extension_types # ---------------------------------------------------------------------- # Rendering Methods diff --git a/pandas/core/internals/api.py b/pandas/core/internals/api.py index 2f8686fd38929..d6b76510c68ab 100644 --- a/pandas/core/internals/api.py +++ b/pandas/core/internals/api.py @@ -6,7 +6,7 @@ 2) Use only functions exposed here (or in core.internals) """ -from __future__ import annotations +from typing import Optional import numpy as np @@ -23,7 +23,6 @@ Block, DatetimeTZBlock, check_ndim, - ensure_block_shape, extract_pandas_array, get_block_type, maybe_coerce_values, @@ -31,7 +30,7 @@ def make_block( - values, placement, klass=None, ndim=None, dtype: Dtype | None = None + values, placement, klass=None, ndim=None, dtype: Optional[Dtype] = None ) -> Block: """ This is a pseudo-public analogue to blocks.new_block. @@ -49,7 +48,6 @@ def make_block( values, dtype = extract_pandas_array(values, dtype, ndim) - needs_reshape = False if klass is None: dtype = dtype or values.dtype klass = get_block_type(values, dtype) @@ -57,21 +55,17 @@ def make_block( elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values.dtype): # pyarrow calls get here values = DatetimeArray._simple_new(values, dtype=dtype) - needs_reshape = True if not isinstance(placement, BlockPlacement): placement = BlockPlacement(placement) ndim = maybe_infer_ndim(values, placement, ndim) - if needs_reshape: - values = ensure_block_shape(values, ndim) - check_ndim(values, placement, ndim) values = maybe_coerce_values(values) return klass(values, ndim=ndim, placement=placement) -def maybe_infer_ndim(values, placement: BlockPlacement, ndim: int | None) -> int: +def maybe_infer_ndim(values, placement: BlockPlacement, ndim: Optional[int]) -> int: """ If `ndim` is not provided, infer it from placment and values. """ diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 4276aadd8edd6..603cc6a6ff1f2 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -42,8 +42,6 @@ soft_convert_objects, ) from pandas.core.dtypes.common import ( - is_1d_only_ea_dtype, - is_1d_only_ea_obj, is_categorical_dtype, is_dtype_equal, is_extension_array_dtype, @@ -226,6 +224,7 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: # expected "ndarray") return self.values # type: ignore[return-value] + @final def get_block_values_for_json(self) -> np.ndarray: """ This is used in the JSON C code. @@ -416,11 +415,7 @@ def _split_op_result(self, result) -> list[Block]: # if we get a 2D ExtensionArray, we need to split it into 1D pieces nbs = [] for i, loc in enumerate(self._mgr_locs): - if not is_1d_only_ea_obj(result): - vals = result[i : i + 1] - else: - vals = result[i] - + vals = result[i] block = self.make_block(values=vals, placement=loc) nbs.append(block) return nbs @@ -1675,7 +1670,7 @@ class NumericBlock(NumpyBlock): is_numeric = True -class NDArrayBackedExtensionBlock(libinternals.Block, EABackedBlock): +class NDArrayBackedExtensionBlock(EABackedBlock): """ Block backed by an NDArrayBackedExtensionArray """ @@ -1688,6 +1683,11 @@ def is_view(self) -> bool: # check the ndarray values of the DatetimeIndex values return self.values._ndarray.base is not None + def iget(self, key): + # GH#31649 we need to wrap scalars in Timestamp/Timedelta + # TODO(EA2D): this can be removed if we ever have 2D EA + return self.values.reshape(self.shape)[key] + def setitem(self, indexer, value): if not self._can_hold_element(value): # TODO: general case needs casting logic. @@ -1707,21 +1707,24 @@ def putmask(self, mask, new) -> list[Block]: if not self._can_hold_element(new): return self.astype(object).putmask(mask, new) - arr = self.values + # TODO(EA2D): reshape unnecessary with 2D EAs + arr = self.values.reshape(self.shape) arr.T.putmask(mask, new) return [self] def where(self, other, cond, errors="raise") -> list[Block]: # TODO(EA2D): reshape unnecessary with 2D EAs - arr = self.values + arr = self.values.reshape(self.shape) cond = extract_bool_array(cond) try: res_values = arr.T.where(cond, other).T except (ValueError, TypeError): - return Block.where(self, other, cond, errors=errors) + return super().where(other, cond, errors=errors) + # TODO(EA2D): reshape not needed with 2D EAs + res_values = res_values.reshape(self.values.shape) nb = self.make_block_same_class(res_values) return [nb] @@ -1745,13 +1748,15 @@ def diff(self, n: int, axis: int = 0) -> list[Block]: The arguments here are mimicking shift so they are called correctly by apply. """ - values = self.values + # TODO(EA2D): reshape not necessary with 2D EAs + values = self.values.reshape(self.shape) new_values = values - values.shift(n, axis=axis) return [self.make_block(new_values)] def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]: - values = self.values + # TODO(EA2D) this is unnecessary if these blocks are backed by 2D EAs + values = self.values.reshape(self.shape) new_values = values.shift(periods, fill_value=fill_value, axis=axis) return [self.make_block_same_class(new_values)] @@ -1771,27 +1776,31 @@ def fillna( return [self.make_block_same_class(values=new_values)] -class DatetimeLikeBlock(NDArrayBackedExtensionBlock): +class DatetimeLikeBlock(libinternals.Block, NDArrayBackedExtensionBlock): """Block for datetime64[ns], timedelta64[ns].""" __slots__ = () is_numeric = False values: DatetimeArray | TimedeltaArray - def get_block_values_for_json(self): - # Not necessary to override, but helps perf - return self.values._ndarray - -class DatetimeTZBlock(DatetimeLikeBlock): +class DatetimeTZBlock(ExtensionBlock, NDArrayBackedExtensionBlock): """ implement a datetime64 block with a tz attribute """ values: DatetimeArray __slots__ = () is_extension = True - _validate_ndim = True - _can_consolidate = False + is_numeric = False + + diff = NDArrayBackedExtensionBlock.diff + where = NDArrayBackedExtensionBlock.where + putmask = NDArrayBackedExtensionBlock.putmask + fillna = NDArrayBackedExtensionBlock.fillna + + get_values = NDArrayBackedExtensionBlock.get_values + + is_view = NDArrayBackedExtensionBlock.is_view class ObjectBlock(NumpyBlock): @@ -1958,7 +1967,7 @@ def check_ndim(values, placement: BlockPlacement, ndim: int): f"values.ndim > ndim [{values.ndim} > {ndim}]" ) - elif not is_1d_only_ea_dtype(values.dtype): + elif isinstance(values.dtype, np.dtype): # TODO(EA2D): special case not needed with 2D EAs if values.ndim != ndim: raise ValueError( @@ -1972,7 +1981,7 @@ def check_ndim(values, placement: BlockPlacement, ndim: int): ) elif ndim == 2 and len(placement) != 1: # TODO(EA2D): special case unnecessary with 2D EAs - raise ValueError("need to split") + raise AssertionError("block.size != values.size") def extract_pandas_array( @@ -2017,9 +2026,8 @@ def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike: """ Reshape if possible to have values.ndim == ndim. """ - if values.ndim < ndim: - if not is_1d_only_ea_dtype(values.dtype): + if not is_extension_array_dtype(values.dtype): # TODO(EA2D): https://github.com/pandas-dev/pandas/issues/23023 # block.shape is incorrect for "2D" ExtensionArrays # We can't, and don't need to, reshape. diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 51a381a1b7f4f..0b0013eeb7147 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -5,7 +5,6 @@ from typing import ( TYPE_CHECKING, Sequence, - cast, ) import numpy as np @@ -24,8 +23,6 @@ find_common_type, ) from pandas.core.dtypes.common import ( - is_1d_only_ea_dtype, - is_1d_only_ea_obj, is_datetime64tz_dtype, is_dtype_equal, is_extension_array_dtype, @@ -213,8 +210,8 @@ def concatenate_managers( values = np.concatenate(vals, axis=blk.ndim - 1) else: # TODO(EA2D): special-casing not needed with 2D EAs - values = concat_compat(vals, axis=1) - values = ensure_block_shape(values, blk.ndim) + values = concat_compat(vals) + values = ensure_block_shape(values, ndim=2) values = ensure_wrapped_if_datetimelike(values) @@ -415,16 +412,13 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike: fill_value = None if is_datetime64tz_dtype(empty_dtype): - i8values = np.full(self.shape, fill_value.value) + # TODO(EA2D): special case unneeded with 2D EAs + i8values = np.full(self.shape[1], fill_value.value) return DatetimeArray(i8values, dtype=empty_dtype) - elif is_extension_array_dtype(blk_dtype): pass - - elif is_1d_only_ea_dtype(empty_dtype): - empty_dtype = cast(ExtensionDtype, empty_dtype) + elif isinstance(empty_dtype, ExtensionDtype): cls = empty_dtype.construct_array_type() - missing_arr = cls._from_sequence([], dtype=empty_dtype) ncols, nrows = self.shape assert ncols == 1, ncols @@ -435,7 +429,6 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike: else: # NB: we should never get here with empty_dtype integer or bool; # if we did, the missing_arr.fill would cast to gibberish - empty_dtype = cast(np.dtype, empty_dtype) missing_arr = np.empty(self.shape, dtype=empty_dtype) missing_arr.fill(fill_value) @@ -500,17 +493,15 @@ def _concatenate_join_units( concat_values = concat_values.copy() else: concat_values = concat_values.copy() - - elif any(is_1d_only_ea_obj(t) for t in to_concat): - # TODO(EA2D): special case not needed if all EAs used HybridBlocks - # NB: we are still assuming here that Hybrid blocks have shape (1, N) + elif any(isinstance(t, ExtensionArray) and t.ndim == 1 for t in to_concat): # concatting with at least one EA means we are concatting a single column # the non-EA values are 2D arrays with shape (1, n) - # error: Invalid index type "Tuple[int, slice]" for # "Union[ExtensionArray, ndarray]"; expected type "Union[int, slice, ndarray]" to_concat = [ - t if is_1d_only_ea_obj(t) else t[0, :] # type: ignore[index] + t + if (isinstance(t, ExtensionArray) and t.ndim == 1) + else t[0, :] # type: ignore[index] for t in to_concat ] concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 83ecdbce5fa80..2960fb292818a 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -32,7 +32,6 @@ maybe_upcast, ) from pandas.core.dtypes.common import ( - is_1d_only_ea_dtype, is_datetime64tz_dtype, is_dtype_equal, is_extension_array_dtype, @@ -56,8 +55,7 @@ ) from pandas.core.arrays import ( Categorical, - ExtensionArray, - TimedeltaArray, + DatetimeArray, ) from pandas.core.construction import ( extract_array, @@ -261,8 +259,7 @@ def ndarray_to_mgr( if not len(values) and columns is not None and len(columns): values = np.empty((0, 1), dtype=object) - vdtype = getattr(values, "dtype", None) - if is_1d_only_ea_dtype(vdtype) or isinstance(dtype, ExtensionDtype): + if is_extension_array_dtype(values) or isinstance(dtype, ExtensionDtype): # GH#19157 if isinstance(values, np.ndarray) and values.ndim > 1: @@ -277,18 +274,9 @@ def ndarray_to_mgr( return arrays_to_mgr(values, columns, index, columns, dtype=dtype, typ=typ) - if is_extension_array_dtype(vdtype) and not is_1d_only_ea_dtype(vdtype): - # i.e. Datetime64TZ - values = extract_array(values, extract_numpy=True) - if copy: - values = values.copy() - if values.ndim == 1: - values = values.reshape(-1, 1) - - else: - # by definition an array here - # the dtypes will be coerced to a single dtype - values = _prep_ndarray(values, copy=copy) + # by definition an array here + # the dtypes will be coerced to a single dtype + values = _prep_ndarray(values, copy=copy) if dtype is not None and not is_dtype_equal(values.dtype, dtype): shape = values.shape @@ -332,6 +320,7 @@ def ndarray_to_mgr( dvals_list = [ensure_block_shape(dval, 2) for dval in dvals_list] # TODO: What about re-joining object columns? + dvals_list = [maybe_squeeze_dt64tz(x) for x in dvals_list] block_values = [ new_block(dvals_list[n], placement=n, ndim=2) for n in range(len(dvals_list)) @@ -339,10 +328,12 @@ def ndarray_to_mgr( else: datelike_vals = maybe_infer_to_datetimelike(values) + datelike_vals = maybe_squeeze_dt64tz(datelike_vals) nb = new_block(datelike_vals, placement=slice(len(columns)), ndim=2) block_values = [nb] else: - nb = new_block(values, placement=slice(len(columns)), ndim=2) + new_values = maybe_squeeze_dt64tz(values) + nb = new_block(new_values, placement=slice(len(columns)), ndim=2) block_values = [nb] if len(columns) == 0: @@ -369,6 +360,20 @@ def _check_values_indices_shape_match( raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}") +def maybe_squeeze_dt64tz(dta: ArrayLike) -> ArrayLike: + """ + If we have a tzaware DatetimeArray with shape (1, N), squeeze to (N,) + """ + # TODO(EA2D): kludge not needed with 2D EAs + if isinstance(dta, DatetimeArray) and dta.ndim == 2 and dta.tz is not None: + assert dta.shape[0] == 1 + # error: Incompatible types in assignment (expression has type + # "Union[DatetimeLikeArrayMixin, Union[Any, NaTType]]", variable has + # type "Union[ExtensionArray, ndarray]") + dta = dta[0] # type: ignore[assignment] + return dta + + def dict_to_mgr( data: dict, index, @@ -391,6 +396,7 @@ def dict_to_mgr( arrays = Series(data, index=columns, dtype=object) data_names = arrays.index + missing = arrays.isna() if index is None: # GH10856 @@ -475,23 +481,13 @@ def treat_as_nested(data) -> bool: """ Check if we should use nested_data_to_arrays. """ - return ( - len(data) > 0 - and is_list_like(data[0]) - and getattr(data[0], "ndim", 1) == 1 - and not (isinstance(data, ExtensionArray) and data.ndim == 2) - ) + return len(data) > 0 and is_list_like(data[0]) and getattr(data[0], "ndim", 1) == 1 # --------------------------------------------------------------------- def _prep_ndarray(values, copy: bool = True) -> np.ndarray: - if isinstance(values, TimedeltaArray): - # On older numpy, np.asarray below apparently does not call __array__, - # so nanoseconds get dropped. - values = values._ndarray - if not isinstance(values, (np.ndarray, ABCSeries, Index)): if len(values) == 0: return np.empty((0, 0), dtype=object) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 5db6592ba77f9..97d605e2fa2d1 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -9,7 +9,6 @@ Hashable, Sequence, TypeVar, - cast, ) import warnings @@ -33,7 +32,6 @@ from pandas.core.dtypes.cast import infer_dtype_from_scalar from pandas.core.dtypes.common import ( ensure_platform_int, - is_1d_only_ea_dtype, is_dtype_equal, is_extension_array_dtype, is_list_like, @@ -49,7 +47,6 @@ ) import pandas.core.algorithms as algos -from pandas.core.arrays._mixins import NDArrayBackedExtensionArray from pandas.core.arrays.sparse import SparseDtype from pandas.core.construction import ( ensure_wrapped_if_datetimelike, @@ -1051,19 +1048,6 @@ def __init__( f"Number of Block dimensions ({block.ndim}) must equal " f"number of axes ({self.ndim})" ) - if isinstance(block, DatetimeTZBlock) and block.values.ndim == 1: - # TODO: remove once fastparquet no longer needs this - # error: Incompatible types in assignment (expression has type - # "Union[ExtensionArray, ndarray]", variable has type - # "DatetimeArray") - block.values = ensure_block_shape( # type: ignore[assignment] - block.values, self.ndim - ) - try: - block._cache.clear() - except AttributeError: - # _cache not initialized - pass self._verify_integrity() @@ -1165,8 +1149,7 @@ def iset(self, loc: int | slice | np.ndarray, value: ArrayLike): self._rebuild_blknos_and_blklocs() # Note: we exclude DTA/TDA here - vdtype = getattr(value, "dtype", None) - value_is_extension_type = is_1d_only_ea_dtype(vdtype) + value_is_extension_type = is_extension_array_dtype(value) # categorical/sparse/datetimetz if value_is_extension_type: @@ -1797,12 +1780,7 @@ def _form_blocks( if len(items_dict["DatetimeTZBlock"]): dttz_blocks = [ - new_block( - ensure_block_shape(extract_array(array), 2), - klass=DatetimeTZBlock, - placement=i, - ndim=2, - ) + new_block(array, klass=DatetimeTZBlock, placement=i, ndim=2) for i, array in items_dict["DatetimeTZBlock"] ] blocks.extend(dttz_blocks) @@ -1939,19 +1917,11 @@ def _merge_blocks( # TODO: optimization potential in case all mgrs contain slices and # combination of those slices is a slice, too. new_mgr_locs = np.concatenate([b.mgr_locs.as_array for b in blocks]) - - new_values: ArrayLike - - if isinstance(blocks[0].dtype, np.dtype): - # error: List comprehension has incompatible type List[Union[ndarray, - # ExtensionArray]]; expected List[Union[complex, generic, - # Sequence[Union[int, float, complex, str, bytes, generic]], - # Sequence[Sequence[Any]], SupportsArray]] - new_values = np.vstack([b.values for b in blocks]) # type: ignore[misc] - else: - bvals = [blk.values for blk in blocks] - bvals2 = cast(Sequence[NDArrayBackedExtensionArray], bvals) - new_values = bvals2[0]._concat_same_type(bvals2, axis=0) + # error: List comprehension has incompatible type List[Union[ndarray, + # ExtensionArray]]; expected List[Union[complex, generic, Sequence[Union[int, + # float, complex, str, bytes, generic]], Sequence[Sequence[Any]], + # _SupportsArray]] + new_values = np.vstack([b.values for b in blocks]) # type: ignore[misc] argsort = np.argsort(new_mgr_locs) new_values = new_values[argsort] diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 1a4d8dbe2885e..d889e84cb9045 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -16,7 +16,6 @@ from pandas.core.dtypes.cast import maybe_promote from pandas.core.dtypes.common import ( ensure_platform_int, - is_1d_only_ea_dtype, is_bool_dtype, is_extension_array_dtype, is_integer, @@ -439,7 +438,7 @@ def unstack(obj, level, fill_value=None): f"index must be a MultiIndex to unstack, {type(obj.index)} was passed" ) else: - if is_1d_only_ea_dtype(obj.dtype): + if is_extension_array_dtype(obj.dtype): return _unstack_extension_series(obj, level, fill_value) unstacker = _Unstacker( obj.index, level=level, constructor=obj._constructor_expanddim diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index b9c1113e7f441..8e6c330475e68 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -226,16 +226,6 @@ def test_fillna_2d(self): res4 = dta2.fillna(method="backfill") tm.assert_extension_array_equal(res4, expected2) - # test the DataFrame method while we're here - df = pd.DataFrame(dta) - res = df.fillna(method="pad") - expected = pd.DataFrame(expected1) - tm.assert_frame_equal(res, expected) - - res = df.fillna(method="backfill") - expected = pd.DataFrame(expected2) - tm.assert_frame_equal(res, expected) - def test_array_interface_tz(self): tz = "US/Central" data = DatetimeArray(pd.date_range("2017", periods=2, tz=tz)) diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py index 6e4ed7b77cad8..56c3f8216f033 100644 --- a/pandas/tests/extension/base/constructors.py +++ b/pandas/tests/extension/base/constructors.py @@ -3,10 +3,7 @@ import pandas as pd from pandas.api.extensions import ExtensionArray -from pandas.core.internals.blocks import ( - DatetimeTZBlock, - ExtensionBlock, -) +from pandas.core.internals import ExtensionBlock from pandas.tests.extension.base.base import BaseExtensionTests @@ -29,14 +26,14 @@ def test_series_constructor(self, data): assert result.dtype == data.dtype assert len(result) == len(data) if hasattr(result._mgr, "blocks"): - assert isinstance(result._mgr.blocks[0], (ExtensionBlock, DatetimeTZBlock)) + assert isinstance(result._mgr.blocks[0], ExtensionBlock) assert result._mgr.array is data # Series[EA] is unboxed / boxed correctly result2 = pd.Series(result) assert result2.dtype == data.dtype if hasattr(result._mgr, "blocks"): - assert isinstance(result2._mgr.blocks[0], (ExtensionBlock, DatetimeTZBlock)) + assert isinstance(result2._mgr.blocks[0], ExtensionBlock) def test_series_constructor_no_data_with_index(self, dtype, na_value): result = pd.Series(index=[1, 2, 3], dtype=dtype) @@ -71,7 +68,7 @@ def test_dataframe_constructor_from_dict(self, data, from_series): assert result.dtypes["A"] == data.dtype assert result.shape == (len(data), 1) if hasattr(result._mgr, "blocks"): - assert isinstance(result._mgr.blocks[0], (ExtensionBlock, DatetimeTZBlock)) + assert isinstance(result._mgr.blocks[0], ExtensionBlock) assert isinstance(result._mgr.arrays[0], ExtensionArray) def test_dataframe_from_series(self, data): @@ -79,7 +76,7 @@ def test_dataframe_from_series(self, data): assert result.dtypes[0] == data.dtype assert result.shape == (len(data), 1) if hasattr(result._mgr, "blocks"): - assert isinstance(result._mgr.blocks[0], (ExtensionBlock, DatetimeTZBlock)) + assert isinstance(result._mgr.blocks[0], ExtensionBlock) assert isinstance(result._mgr.arrays[0], ExtensionArray) def test_series_given_mismatched_index_raises(self, data): diff --git a/pandas/tests/frame/methods/test_set_index.py b/pandas/tests/frame/methods/test_set_index.py index 62dc400f8de9f..430abd9700a23 100644 --- a/pandas/tests/frame/methods/test_set_index.py +++ b/pandas/tests/frame/methods/test_set_index.py @@ -96,18 +96,15 @@ def test_set_index_cast_datetimeindex(self): idf = df.set_index("A") assert isinstance(idf.index, DatetimeIndex) - def test_set_index_dst(self, using_array_manager): + def test_set_index_dst(self): di = date_range("2006-10-29 00:00:00", periods=3, freq="H", tz="US/Pacific") df = DataFrame(data={"a": [0, 1, 2], "b": [3, 4, 5]}, index=di).reset_index() # single level res = df.set_index("index") exp = DataFrame( - data={"a": [0, 1, 2], "b": [3, 4, 5]}, - index=Index(di, name="index"), + data={"a": [0, 1, 2], "b": [3, 4, 5]}, index=Index(di, name="index") ) - if not using_array_manager: - exp.index = exp.index._with_freq(None) tm.assert_frame_equal(res, exp) # GH#12920 diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index ba0acdc4f947b..748aa462cddae 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -45,7 +45,7 @@ def test_setitem_invalidates_datetime_index_freq(self): ts = dti[1] df = DataFrame({"B": dti}) - assert df["B"]._values.freq is None + assert df["B"]._values.freq == "D" df.iloc[1, 0] = pd.NaT assert df["B"]._values.freq is None diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 3299503dbc3a4..a1c5810ba8bb8 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -545,7 +545,7 @@ def test_astype(self, t): mgr = create_mgr("a,b: object; c: bool; d: datetime; e: f4; f: f2; g: f8") t = np.dtype(t) - with tm.assert_produces_warning(warn, check_stacklevel=False): + with tm.assert_produces_warning(warn): tmgr = mgr.astype(t, errors="ignore") assert tmgr.iget(2).dtype.type == t assert tmgr.iget(4).dtype.type == t @@ -618,10 +618,10 @@ def _compare(old_mgr, new_mgr): assert new_mgr.iget(8).dtype == np.float16 def test_invalid_ea_block(self): - with pytest.raises(ValueError, match="need to split"): + with pytest.raises(AssertionError, match="block.size != values.size"): create_mgr("a: category; b: category") - with pytest.raises(ValueError, match="need to split"): + with pytest.raises(AssertionError, match="block.size != values.size"): create_mgr("a: category2; b: category2") def test_interleave(self): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 67649e6e37b35..82961a42e4ff0 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1341,7 +1341,7 @@ def test_constructor_dtype_timedelta64(self): # td.astype('m8[%s]' % t) # valid astype - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): # astype(int64) deprecated td.astype("int64")