diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index e4aeb7ad69792..9bb5e10348e47 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -55,9 +55,7 @@ from pandas.core.dtypes.common import ( is_bool_dtype, is_object_dtype, is_datetime64_dtype, pandas_dtype, is_extension_array_dtype) -from pandas.core.arrays import Categorical from pandas.core.dtypes.concat import union_categoricals -import pandas.io.common as icom from pandas.compat import _import_lzma, _get_lzma_file from pandas.errors import (ParserError, DtypeWarning, @@ -1149,7 +1147,8 @@ cdef class TextReader: # Method accepts list of strings, not encoded ones. true_values = [x.decode() for x in self.true_values] - cat = Categorical._from_inferred_categories( + array_type = dtype.construct_array_type() + cat = array_type._from_inferred_categories( cats, codes, dtype, true_values=true_values) return cat, na_count diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 9db9805e09b50..e6967630b97ac 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -29,7 +29,6 @@ is_bool_dtype, is_categorical_dtype, is_complex_dtype, - is_datetime64_any_dtype, is_datetime64_dtype, is_datetime64_ns_dtype, is_extension_array_dtype, @@ -122,12 +121,7 @@ def _ensure_data(values, dtype=None): return ensure_object(values), "object" # datetimelike - if ( - needs_i8_conversion(values) - or is_period_dtype(dtype) - or is_datetime64_any_dtype(dtype) - or is_timedelta64_dtype(dtype) - ): + if needs_i8_conversion(values) or needs_i8_conversion(dtype): if is_period_dtype(values) or is_period_dtype(dtype): from pandas import PeriodIndex diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index ece92acae6461..e12083dae5035 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -638,8 +638,6 @@ def astype(self, dtype, copy=True): # 1. PeriodArray.astype handles period -> period # 2. DatetimeArray.astype handles conversion between tz. # 3. DatetimeArray.astype handles datetime -> period - from pandas import Categorical - dtype = pandas_dtype(dtype) if is_object_dtype(dtype): @@ -667,7 +665,8 @@ def astype(self, dtype, copy=True): msg = f"Cannot cast {type(self).__name__} to dtype {dtype}" raise TypeError(msg) elif is_categorical_dtype(dtype): - return Categorical(self, dtype=dtype) + arr_cls = dtype.construct_array_type() + return arr_cls(self, dtype=dtype) else: return np.asarray(self, dtype=dtype) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index f777f52f56c9c..1316e15334fa6 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -22,8 +22,8 @@ from pandas.errors import PerformanceWarning from pandas.core.dtypes.common import ( - _INT64_DTYPE, DT64NS_DTYPE, + INT64_DTYPE, is_bool_dtype, is_categorical_dtype, is_datetime64_any_dtype, @@ -404,7 +404,7 @@ def _generate_range( start = start.tz_localize(None) if end is not None: end = end.tz_localize(None) - # TODO: consider re-implementing _cached_range; GH#17914 + values, _tz = generate_regular_range(start, end, periods, freq) index = cls._simple_new(values, freq=freq, dtype=tz_to_dtype(_tz)) @@ -1963,7 +1963,7 @@ def sequence_to_dt64ns( if tz: tz = timezones.maybe_get_tz(tz) - if data.dtype != _INT64_DTYPE: + if data.dtype != INT64_DTYPE: data = data.astype(np.int64, copy=False) result = data.view(DT64NS_DTYPE) diff --git a/pandas/core/base.py b/pandas/core/base.py index 5945d8a4b432d..ee514888c6331 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1143,8 +1143,7 @@ def _map_values(self, mapper, na_action=None): raise NotImplementedError map_f = lambda values, f: values.map(f) else: - values = self.astype(object) - values = getattr(values, "values", values) + values = self.astype(object)._values if na_action == "ignore": def map_f(values, f): diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 7dda6850ba4f7..df70e73c6aadb 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -21,9 +21,9 @@ from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.common import ( - _INT64_DTYPE, _POSSIBLY_CAST_DTYPES, DT64NS_DTYPE, + INT64_DTYPE, TD64NS_DTYPE, ensure_int8, ensure_int16, @@ -954,7 +954,7 @@ def astype_nansafe(arr, dtype, copy: bool = True, skipna: bool = False): raise ValueError("Cannot convert NaT values to integer") return arr.view(dtype) - if dtype not in [_INT64_DTYPE, TD64NS_DTYPE]: + if dtype not in [INT64_DTYPE, TD64NS_DTYPE]: # allow frequency conversions # we return a float here! diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 5ea3ca09862c1..abfbe8d783325 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -60,7 +60,7 @@ DT64NS_DTYPE = conversion.DT64NS_DTYPE TD64NS_DTYPE = conversion.TD64NS_DTYPE -_INT64_DTYPE = np.dtype(np.int64) +INT64_DTYPE = np.dtype(np.int64) # oh the troubles to reduce import time _is_scipy_sparse = None @@ -68,9 +68,6 @@ ensure_float64 = algos.ensure_float64 ensure_float32 = algos.ensure_float32 -_ensure_datetime64ns = conversion.ensure_datetime64ns -_ensure_timedelta64ns = conversion.ensure_timedelta64ns - def ensure_float(arr): """ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2878204f5ee79..25a3b14120537 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3548,8 +3548,6 @@ class animal locomotion result._set_is_copy(self, copy=not result._is_view) return result - _xs: Callable = xs - def __getitem__(self, item): raise AbstractMethodError(self) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index dd072cf00ed20..d100cb0bb70d8 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1046,7 +1046,7 @@ def _getitem_tuple(self, tup: Tuple): def _get_label(self, label, axis: int): # GH#5667 this will fail if the label is not present in the axis. - return self.obj._xs(label, axis=axis) + return self.obj.xs(label, axis=axis) def _handle_lowerdim_multi_index_axis0(self, tup: Tuple): # we have an axis0 multi-index, handle or raise diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1bcbcb61ddde4..71efde1cc5380 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -585,8 +585,7 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"): newb = self.copy() if copy else self return newb - # TODO(extension) - # should we make this attribute? + # TODO(EA2D): special case not needed with 2D EAs if isinstance(values, np.ndarray): values = values.reshape(self.shape) @@ -1554,6 +1553,7 @@ def __init__(self, values, placement, ndim=None): @property def shape(self): + # TODO(EA2D): override unnecessary with 2D EAs if self.ndim == 1: return ((len(self.values)),) return (len(self.mgr_locs), len(self.values)) @@ -1561,6 +1561,7 @@ def shape(self): def iget(self, col): if self.ndim == 2 and isinstance(col, tuple): + # TODO(EA2D): unnecessary with 2D EAs col, loc = col if not com.is_null_slice(col) and col != 0: raise IndexError(f"{self} only contains one item") @@ -1669,6 +1670,7 @@ def setitem(self, indexer, value): be a compatible shape. """ if isinstance(indexer, tuple): + # TODO(EA2D): not needed with 2D EAs # we are always 1-D indexer = indexer[0] @@ -1678,6 +1680,7 @@ def setitem(self, indexer, value): def get_values(self, dtype=None): # ExtensionArrays must be iterable, so this works. + # TODO(EA2D): reshape not needed with 2D EAs return np.asarray(self.values).reshape(self.shape) def array_values(self) -> ExtensionArray: @@ -1691,6 +1694,7 @@ def to_native_types(self, na_rep="nan", quoting=None, **kwargs): values = np.asarray(values.astype(object)) values[mask] = na_rep + # TODO(EA2D): reshape not needed with 2D EAs # we are expected to return a 2-d ndarray return values.reshape(1, len(values)) @@ -1703,6 +1707,7 @@ def take_nd( if fill_value is lib.no_default: fill_value = None + # TODO(EA2D): special case not needed with 2D EAs # axis doesn't matter; we are really a single-dim object # but are passed the axis depending on the calling routing # if its REALLY axis 0, then this will be a reindex and not a take @@ -2229,6 +2234,7 @@ def diff(self, n: int, axis: int = 0) -> List["Block"]: by apply. """ if axis == 0: + # TODO(EA2D): special case not needed with 2D EAs # Cannot currently calculate diff across multiple blocks since this # function is invoked via apply raise NotImplementedError @@ -2280,7 +2286,7 @@ def quantile(self, qs, interpolation="linear", axis=0): blk = self.make_block(naive) res_blk = blk.quantile(qs, interpolation=interpolation, axis=axis) - # ravel is kludge for 2D block with 1D values, assumes column-like + # TODO(EA2D): ravel is kludge for 2D block with 1D values, assumes column-like aware = self._holder(res_blk.values.ravel(), dtype=self.dtype) return self.make_block_same_class(aware, ndim=res_blk.ndim) @@ -2693,6 +2699,7 @@ def make_block(values, placement, klass=None, ndim=None, dtype=None): if isinstance(values, ABCPandasArray): values = values.to_numpy() if ndim and ndim > 1: + # TODO(EA2D): special case not needed with 2D EAs values = np.atleast_2d(values) if isinstance(dtype, PandasDtype): @@ -2759,6 +2766,7 @@ def _safe_reshape(arr, new_shape): if isinstance(arr, ABCSeries): arr = arr._values if not isinstance(arr, ABCExtensionArray): + # TODO(EA2D): special case not needed with 2D EAs arr = arr.reshape(new_shape) return arr diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index c8f4ec14545c7..743dd6db348b4 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -251,6 +251,7 @@ def get_reindexed_values(self, empty_dtype, upcasted_na): ): if self.block is None: array = empty_dtype.construct_array_type() + # TODO(EA2D): special case unneeded with 2D EAs return array( np.full(self.shape[1], fill_value.value), dtype=empty_dtype )