diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index adb920e0cca6d..94ad2aa3a751f 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -51,10 +51,12 @@ from pandas._libs.khash cimport ( kh_resize_int64, khiter_t, ) +from pandas._libs.missing cimport ( + checknull, + isnaobj, +) from pandas._libs.util cimport get_nat -import pandas._libs.missing as missing - cdef: float64_t FP_ERR = 1e-13 float64_t NaN = np.NaN @@ -95,10 +97,10 @@ class Infinity: def __gt__(self, other): return (not isinstance(other, Infinity) and - not missing.checknull(other)) + not checknull(other)) def __ge__(self, other): - return not missing.checknull(other) + return not checknull(other) class NegInfinity: @@ -107,10 +109,10 @@ class NegInfinity: """ def __lt__(self, other): return (not isinstance(other, NegInfinity) and - not missing.checknull(other)) + not checknull(other)) def __le__(self, other): - return not missing.checknull(other) + return not checknull(other) def __eq__(self, other): return isinstance(other, NegInfinity) @@ -988,7 +990,7 @@ def rank_1d( if mask is not None: pass elif numeric_object_t is object: - mask = missing.isnaobj(masked_vals) + mask = isnaobj(masked_vals) elif numeric_object_t is int64_t and is_datetimelike: mask = (masked_vals == NPY_NAT).astype(np.uint8) elif numeric_object_t is float64_t or numeric_object_t is float32_t: @@ -1366,7 +1368,7 @@ def rank_2d( nan_fill_val = get_rank_nan_fill_val(nans_rank_highest, 0) if numeric_object_t is object: - mask = missing.isnaobj(values).view(np.uint8) + mask = isnaobj(values).view(np.uint8) elif numeric_object_t is float64_t or numeric_object_t is float32_t: mask = np.isnan(values).view(np.uint8) else: diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index a95e92923cd00..f438ddbf3de1f 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -49,7 +49,6 @@ from pandas._libs.missing cimport checknull cdef int64_t NPY_NAT = util.get_nat() -_int64_max = np.iinfo(np.int64).max cdef float64_t NaN = np.NaN @@ -256,9 +255,9 @@ def group_cumprod( Always false, `values` is never datetime-like. skipna : bool If true, ignore nans in `values`. - mask: np.ndarray[uint8], optional + mask : np.ndarray[uint8], optional Mask of values - result_mask: np.ndarray[int8], optional + result_mask : np.ndarray[int8], optional Mask of out array Notes @@ -345,9 +344,9 @@ def group_cumsum( True if `values` contains datetime-like entries. skipna : bool If true, ignore nans in `values`. - mask: np.ndarray[uint8], optional + mask : np.ndarray[uint8], optional Mask of values - result_mask: np.ndarray[int8], optional + result_mask : np.ndarray[int8], optional Mask of out array Notes @@ -615,7 +614,7 @@ def group_any_all( # value encountered is True flag_val = 1 else: - raise ValueError("'bool_func' must be either 'any' or 'all'!") + raise ValueError("'val_test' must be either 'any' or 'all'!") out[:] = 1 - flag_val @@ -1036,7 +1035,7 @@ def group_ohlc( raise NotImplementedError("Argument 'values' must have only one dimension") if int64float_t is float32_t or int64float_t is float64_t: - out[:] = np.nan + out[:] = NAN else: out[:] = 0 diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index c6aded1b25281..616a9bddc24ac 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2428,7 +2428,7 @@ def maybe_convert_objects(ndarray[object] objects, Seen seen = Seen() object val _TSObject tsobj - float64_t fnan = np.nan + float64_t fnan = NaN if dtype_if_all_nat is not None: # in practice we don't expect to ever pass dtype_if_all_nat diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx index 946ba5ddaa248..c669bf349162d 100644 --- a/pandas/_libs/reshape.pyx +++ b/pandas/_libs/reshape.pyx @@ -9,6 +9,7 @@ from numpy cimport ( import numpy as np cimport numpy as cnp +from numpy.math cimport NAN cnp.import_array() @@ -129,7 +130,7 @@ def explode(ndarray[object] values): count += 1 else: # empty list-like, use a nan marker - result[count] = np.nan + result[count] = NAN count += 1 else: # replace with the existing scalar diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 74f7653ebbe0c..0918ecc977a3a 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -1,4 +1,5 @@ cimport cython + import numpy as np cimport numpy as cnp @@ -10,16 +11,14 @@ from numpy cimport ( ndarray, uint8_t, ) +from numpy.math cimport ( + INFINITY as INF, + NAN as NaN, +) cnp.import_array() -# ----------------------------------------------------------------------------- -# Preamble stuff - -cdef float64_t NaN = np.NaN -cdef float64_t INF = np.inf - # ----------------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 3873e0c848145..b162f278fcbec 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -150,12 +150,13 @@ def get_date_name_field( name based on requested field (e.g. day_name) """ cdef: - Py_ssize_t i, count = dtindex.shape[0] + Py_ssize_t i + cnp.npy_intp count = dtindex.shape[0] ndarray[object] out, names npy_datetimestruct dts int dow - out = np.empty(count, dtype=object) + out = cnp.PyArray_EMPTY(1, &count, cnp.NPY_OBJECT, 0) if field == "day_name": if locale is None: diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 6105f96a3b1b8..4c4e3dfa4bf76 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -270,11 +270,12 @@ cdef object _get_utc_trans_times_from_dateutil_tz(tzinfo tz): cdef int64_t[::1] unbox_utcoffsets(object transinfo): cdef: - Py_ssize_t i, sz + Py_ssize_t i + cnp.npy_intp sz int64_t[::1] arr sz = len(transinfo) - arr = np.empty(sz, dtype="i8") + arr = cnp.PyArray_EMPTY(1, &sz, cnp.NPY_INT64, 0) for i in range(sz): arr[i] = int(transinfo[i][0].total_seconds()) * 1_000_000_000 diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index f424b74c6e577..0a19092f57706 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -1,14 +1,11 @@ cimport cython +cimport numpy as cnp from cpython.datetime cimport ( date, datetime, time, tzinfo, ) - -import numpy as np - -cimport numpy as cnp from numpy cimport ( int64_t, ndarray, @@ -101,7 +98,7 @@ def ints_to_pydatetime( tzinfo tz=None, str box="datetime", NPY_DATETIMEUNIT reso=NPY_FR_ns, -) -> np.ndarray: +) -> ndarray: # stamps is int64, arbitrary ndim """ Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp. diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 334400cc13201..2e82cabc76994 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1078,7 +1078,9 @@ def _add_datetime_arraylike(self, other: DatetimeArray) -> DatetimeArray: return other + self @final - def _sub_datetimelike_scalar(self, other: datetime | np.datetime64): + def _sub_datetimelike_scalar( + self, other: datetime | np.datetime64 + ) -> TimedeltaArray: if self.dtype.kind != "M": raise TypeError(f"cannot subtract a datelike from a {type(self).__name__}") @@ -1095,7 +1097,7 @@ def _sub_datetimelike_scalar(self, other: datetime | np.datetime64): return self._sub_datetimelike(ts) @final - def _sub_datetime_arraylike(self, other: DatetimeArray): + def _sub_datetime_arraylike(self, other: DatetimeArray) -> TimedeltaArray: if self.dtype.kind != "M": raise TypeError(f"cannot subtract a datelike from a {type(self).__name__}") @@ -1296,7 +1298,7 @@ def _addsub_object_array(self, other: npt.NDArray[np.object_], op): res_values = op(self.astype("O"), np.asarray(other)) return res_values - def _accumulate(self, name: str, *, skipna: bool = True, **kwargs): + def _accumulate(self, name: str, *, skipna: bool = True, **kwargs) -> Self: if name not in {"cummin", "cummax"}: raise TypeError(f"Accumulation {name} not supported for {type(self)}") @@ -2015,7 +2017,7 @@ def round( freq, ambiguous: TimeAmbiguous = "raise", nonexistent: TimeNonexistent = "raise", - ): + ) -> Self: return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent) @Appender((_round_doc + _floor_example).format(op="floor")) @@ -2024,7 +2026,7 @@ def floor( freq, ambiguous: TimeAmbiguous = "raise", nonexistent: TimeNonexistent = "raise", - ): + ) -> Self: return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent) @Appender((_round_doc + _ceil_example).format(op="ceil")) @@ -2033,7 +2035,7 @@ def ceil( freq, ambiguous: TimeAmbiguous = "raise", nonexistent: TimeNonexistent = "raise", - ): + ) -> Self: return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent) # -------------------------------------------------------------- @@ -2054,7 +2056,7 @@ def all(self, *, axis: AxisInt | None = None, skipna: bool = True) -> bool: def _maybe_clear_freq(self) -> None: self._freq = None - def _with_freq(self, freq): + def _with_freq(self, freq) -> Self: """ Helper to get a view on the same data, with a new freq. diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 4bd95da2b6b07..2508bad80dc26 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -353,7 +353,9 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal else: if hasattr(scalars, "type"): - # pyarrow array + # pyarrow array; we cannot rely on the "to_numpy" check in + # ensure_string_array because calling scalars.to_numpy would set + # zero_copy_only to True which caused problems see GH#52076 scalars = np.array(scalars) # convert non-na-likes to str, and nan-likes to StringDtype().na_value result = lib.ensure_string_array(scalars, na_value=libmissing.NA, copy=copy) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index c81ebc06ba753..9a74da33db531 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -112,7 +112,6 @@ _int8_max = np.iinfo(np.int8).max _int16_max = np.iinfo(np.int16).max _int32_max = np.iinfo(np.int32).max -_int64_max = np.iinfo(np.int64).max _dtype_obj = np.dtype(object) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bcba7c8c13f8c..0ff7161ca0459 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -865,6 +865,7 @@ def __init__( NDFrame.__init__(self, mgr) # ---------------------------------------------------------------------- + def __dataframe__( self, nan_as_null: bool = False, allow_copy: bool = True ) -> DataFrameXchg: @@ -1029,16 +1030,10 @@ def _repr_fits_vertical_(self) -> bool: max_rows = get_option("display.max_rows") return len(self) <= max_rows - def _repr_fits_horizontal_(self, ignore_width: bool = False) -> bool: + def _repr_fits_horizontal_(self) -> bool: """ Check if full repr fits in horizontal boundaries imposed by the display options width and max_columns. - - In case of non-interactive session, no boundaries apply. - - `ignore_width` is here so ipynb+HTML output can behave the way - users expect. display.max_columns remains in effect. - GH3541, GH3573 """ width, height = console.get_console_size() max_columns = get_option("display.max_columns") @@ -1046,13 +1041,13 @@ def _repr_fits_horizontal_(self, ignore_width: bool = False) -> bool: # exceed max columns if (max_columns and nb_columns > max_columns) or ( - (not ignore_width) and width and nb_columns > (width // 2) + width and nb_columns > (width // 2) ): return False # used by repr_html under IPython notebook or scripts ignore terminal # dims - if ignore_width or width is None or not console.in_interactive_session(): + if width is None or not console.in_interactive_session(): return True if get_option("display.width") is not None or console.in_ipython_frontend(): @@ -4928,65 +4923,6 @@ def _series(self): # ---------------------------------------------------------------------- # Reindexing and alignment - def _reindex_axes( - self, axes, level, limit: int | None, tolerance, method, fill_value, copy - ): - frame = self - - columns = axes["columns"] - if columns is not None: - frame = frame._reindex_columns( - columns, method, copy, level, fill_value, limit, tolerance - ) - - index = axes["index"] - if index is not None: - frame = frame._reindex_index( - index, method, copy, level, fill_value, limit, tolerance - ) - - return frame - - def _reindex_index( - self, - new_index, - method, - copy: bool, - level: Level, - fill_value=np.nan, - limit: int | None = None, - tolerance=None, - ): - new_index, indexer = self.index.reindex( - new_index, method=method, level=level, limit=limit, tolerance=tolerance - ) - return self._reindex_with_indexers( - {0: [new_index, indexer]}, - copy=copy, - fill_value=fill_value, - allow_dups=False, - ) - - def _reindex_columns( - self, - new_columns, - method, - copy: bool, - level: Level, - fill_value=None, - limit: int | None = None, - tolerance=None, - ): - new_columns, indexer = self.columns.reindex( - new_columns, method=method, level=level, limit=limit, tolerance=tolerance - ) - return self._reindex_with_indexers( - {1: [new_columns, indexer]}, - copy=copy, - fill_value=fill_value, - allow_dups=False, - ) - def _reindex_multi( self, axes: dict[str, Index], copy: bool, fill_value ) -> DataFrame: @@ -7502,7 +7438,9 @@ def _arith_method(self, other, op): _logical_method = _arith_method - def _dispatch_frame_op(self, right, func: Callable, axis: AxisInt | None = None): + def _dispatch_frame_op( + self, right, func: Callable, axis: AxisInt | None = None + ) -> DataFrame: """ Evaluate the frame operation func(left, right) by evaluating column-by-column, dispatching to the Series implementation. @@ -7667,7 +7605,7 @@ def _should_reindex_frame_op(self, right, op, axis: int, fill_value, level) -> b return False def _align_for_op( - self, other, axis, flex: bool | None = False, level: Level = None + self, other, axis: AxisInt, flex: bool | None = False, level: Level = None ): """ Convert rhs to meet lhs dims if input is list, tuple or np.ndarray. @@ -7676,7 +7614,7 @@ def _align_for_op( ---------- left : DataFrame right : Any - axis : int, str, or None + axis : int flex : bool or None, default False Whether this is a flex op, in which case we reindex. None indicates not to check for alignment. @@ -7703,7 +7641,7 @@ def to_series(right): # datetime64[h] ndarray dtype = object - if axis is not None and left._get_axis_number(axis) == 0: + if axis == 0: if len(left.index) != len(right): raise ValueError( msg.format(req_len=len(left.index), given_len=len(right)) @@ -7780,8 +7718,6 @@ def to_series(right): ) elif isinstance(right, Series): # axis=1 is default for DataFrame-with-Series op - axis = left._get_axis_number(axis) if axis is not None else 1 - if not flex: if not left.axes[axis].equals(right.index): raise ValueError( diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 72fd7fadd0987..02a2a202939c0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -242,10 +242,7 @@ class NDFrame(PandasObject, indexing.IndexingMixin): "_item_cache", "_cache", "_is_copy", - "_subtyp", "_name", - "_default_kind", - "_default_fill_value", "_metadata", "__array_struct__", "__array_interface__", @@ -281,6 +278,7 @@ def __init__( object.__setattr__(self, "_attrs", attrs) object.__setattr__(self, "_flags", Flags(self, allows_duplicate_labels=True)) + @final @classmethod def _init_mgr( cls, @@ -622,6 +620,7 @@ def axes(self) -> list[Index]: # the block manager shows then reversed return [self._get_axis(a) for a in self._AXIS_ORDERS] + @final @property def ndim(self) -> int: """ @@ -645,6 +644,7 @@ def ndim(self) -> int: """ return self._mgr.ndim + @final @property def size(self) -> int: """ @@ -4673,7 +4673,7 @@ def _drop_axis( ) result = self._constructor(new_mgr) if self.ndim == 1: - result.name = self.name + result._name = self.name return result.__finalize__(self) @@ -5397,8 +5397,16 @@ def reindex( axes, level, limit, tolerance, method, fill_value, copy ).__finalize__(self, method="reindex") + @final def _reindex_axes( - self, axes, level, limit, tolerance, method, fill_value, copy + self, + axes, + level: Level | None, + limit: int | None, + tolerance, + method, + fill_value: Scalar | None, + copy: bool_t | None, ) -> Self: """Perform the reindex for all the axes.""" obj = self @@ -5424,7 +5432,7 @@ def _reindex_axes( return obj - def _needs_reindex_multi(self, axes, method, level) -> bool_t: + def _needs_reindex_multi(self, axes, method, level: Level | None) -> bool_t: """Check if we do need a multi reindex.""" return ( (common.count_not_none(*axes.values()) == self._AXIS_LEN) @@ -9527,6 +9535,7 @@ def align( if broadcast_axis is not lib.no_default: # GH#51856 + # TODO(3.0): enforcing this deprecation will close GH#13194 msg = ( f"The 'broadcast_axis' keyword in {type(self).__name__}.align is " "deprecated and will be removed in a future version." diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index dad188e2d9304..6f361ff867c35 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -234,7 +234,7 @@ def _get_cython_vals(self, values: np.ndarray) -> np.ndarray: return values # TODO: general case implementation overridable by EAs. - def _disallow_invalid_ops(self, dtype: DtypeObj, is_numeric: bool = False): + def _disallow_invalid_ops(self, dtype: DtypeObj): """ Check if we can do this operation with our cython functions. @@ -247,7 +247,7 @@ def _disallow_invalid_ops(self, dtype: DtypeObj, is_numeric: bool = False): """ how = self.how - if is_numeric: + if is_numeric_dtype(dtype): # never an invalid op for those dtypes, so return early as fastpath return @@ -711,12 +711,9 @@ def cython_operation( # as we can have 1D ExtensionArrays that we need to treat as 2D assert axis == 0 - dtype = values.dtype - is_numeric = is_numeric_dtype(dtype) - # can we do this operation with our cython functions # if not raise NotImplementedError - self._disallow_invalid_ops(dtype, is_numeric) + self._disallow_invalid_ops(values.dtype) if not isinstance(values, np.ndarray): # i.e. ExtensionArray diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index eb79278eb35d9..81beddac7c432 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -936,7 +936,7 @@ def dtype(self) -> DtypeObj: return self._data.dtype @final - def ravel(self, order: str_t = "C") -> Index: + def ravel(self, order: str_t = "C") -> Self: """ Return a view on self. diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 1133ea6be26ac..eacf979dde3fc 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -280,7 +280,7 @@ def _partial_date_slice( self, reso: Resolution, parsed: datetime, - ): + ) -> slice | npt.NDArray[np.intp]: """ Parameters ---------- @@ -488,10 +488,10 @@ def _as_range_index(self) -> RangeIndex: rng = range(self[0]._value, self[-1]._value + tick, tick) return RangeIndex(rng) - def _can_range_setop(self, other): + def _can_range_setop(self, other) -> bool: return isinstance(self.freq, Tick) and isinstance(other.freq, Tick) - def _wrap_range_setop(self, other, res_i8): + def _wrap_range_setop(self, other, res_i8) -> Self: new_freq = None if not len(res_i8): # RangeIndex defaults to step=1, which we don't want. @@ -508,16 +508,16 @@ def _wrap_range_setop(self, other, res_i8): result = type(self._data)._simple_new( res_values, dtype=self.dtype, freq=new_freq ) - return self._wrap_setop_result(other, result) + return cast("Self", self._wrap_setop_result(other, result)) - def _range_intersect(self, other, sort): + def _range_intersect(self, other, sort) -> Self: # Dispatch to RangeIndex intersection logic. left = self._as_range_index right = other._as_range_index res_i8 = left.intersection(right, sort=sort) return self._wrap_range_setop(other, res_i8) - def _range_union(self, other, sort): + def _range_union(self, other, sort) -> Self: # Dispatch to RangeIndex union logic. left = self._as_range_index right = other._as_range_index @@ -747,7 +747,7 @@ def _get_insert_freq(self, loc: int, item): return freq @doc(NDArrayBackedExtensionIndex.delete) - def delete(self, loc) -> DatetimeTimedeltaMixin: + def delete(self, loc) -> Self: result = super().delete(loc) result._data._freq = self._get_delete_freq(loc) return result @@ -771,7 +771,7 @@ def take( allow_fill: bool = True, fill_value=None, **kwargs, - ): + ) -> Self: nv.validate_take((), kwargs) indices = np.asarray(indices, dtype=np.intp) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 9690806afb173..991195cafccfc 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -57,6 +57,7 @@ DtypeObj, Frequency, IntervalClosedType, + Self, TimeAmbiguous, TimeNonexistent, npt, @@ -266,7 +267,7 @@ def strftime(self, date_format) -> Index: return Index(arr, name=self.name, dtype=object) @doc(DatetimeArray.tz_convert) - def tz_convert(self, tz) -> DatetimeIndex: + def tz_convert(self, tz) -> Self: arr = self._data.tz_convert(tz) return type(self)._simple_new(arr, name=self.name, refs=self._references) @@ -276,7 +277,7 @@ def tz_localize( tz, ambiguous: TimeAmbiguous = "raise", nonexistent: TimeNonexistent = "raise", - ) -> DatetimeIndex: + ) -> Self: arr = self._data.tz_localize(tz, ambiguous, nonexistent) return type(self)._simple_new(arr, name=self.name) @@ -317,7 +318,7 @@ def __new__( dtype: Dtype | None = None, copy: bool = False, name: Hashable = None, - ) -> DatetimeIndex: + ) -> Self: if is_scalar(data): cls._raise_scalar_data_error(data) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index eae70d50e7f95..2fa2b7f54639d 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -49,6 +49,7 @@ from pandas._typing import ( Dtype, DtypeObj, + Self, npt, ) _index_doc_kwargs = dict(ibase._index_doc_kwargs) @@ -175,7 +176,7 @@ def _resolution_obj(self) -> Resolution: other_name="PeriodArray", **_shared_doc_kwargs, ) - def asfreq(self, freq=None, how: str = "E") -> PeriodIndex: + def asfreq(self, freq=None, how: str = "E") -> Self: arr = self._data.asfreq(freq, how) return type(self)._simple_new(arr, name=self.name) @@ -211,7 +212,7 @@ def __new__( copy: bool = False, name: Hashable = None, **fields, - ) -> PeriodIndex: + ) -> Self: valid_field_set = { "year", "month", @@ -272,7 +273,7 @@ def __new__( # Data @property - def values(self) -> np.ndarray: + def values(self) -> npt.NDArray[np.object_]: return np.asarray(self, dtype=object) def _maybe_convert_timedelta(self, other) -> int | npt.NDArray[np.int64]: @@ -476,7 +477,7 @@ def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime): return (iv.asfreq(self.freq, how="start"), iv.asfreq(self.freq, how="end")) @doc(DatetimeIndexOpsMixin.shift) - def shift(self, periods: int = 1, freq=None): + def shift(self, periods: int = 1, freq=None) -> Self: if freq is not None: raise TypeError( f"`freq` argument is not supported for {type(self).__name__}.shift" diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 66c5a12549f23..9b3309706e12b 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -51,6 +51,7 @@ if TYPE_CHECKING: from pandas._typing import ( Dtype, + Self, npt, ) _empty_range = range(0) @@ -144,9 +145,7 @@ def __new__( return cls._simple_new(rng, name=name) @classmethod - def from_range( - cls, data: range, name=None, dtype: Dtype | None = None - ) -> RangeIndex: + def from_range(cls, data: range, name=None, dtype: Dtype | None = None) -> Self: """ Create RangeIndex from a range object. @@ -168,7 +167,7 @@ def from_range( @classmethod def _simple_new( # type: ignore[override] cls, values: range, name: Hashable = None - ) -> RangeIndex: + ) -> Self: result = object.__new__(cls) assert isinstance(values, range) @@ -216,7 +215,7 @@ def _get_data_as_items(self): return [("start", rng.start), ("stop", rng.stop), ("step", rng.step)] def __reduce__(self): - d = {"name": self.name} + d = {"name": self._name} d.update(dict(self._get_data_as_items())) return ibase._new_Index, (type(self), d), None @@ -228,8 +227,8 @@ def _format_attrs(self): Return a list of tuples of the (attr, formatted_value) """ attrs = self._get_data_as_items() - if self.name is not None: - attrs.append(("name", ibase.default_pprint(self.name))) + if self._name is not None: + attrs.append(("name", ibase.default_pprint(self._name))) return attrs def _format_data(self, name=None): @@ -398,7 +397,7 @@ def __iter__(self) -> Iterator[int]: @doc(Index._shallow_copy) def _shallow_copy(self, values, name: Hashable = no_default): - name = self.name if name is no_default else name + name = self._name if name is no_default else name if values.dtype.kind == "f": return Index(values, name=name, dtype=np.float64) @@ -412,13 +411,13 @@ def _shallow_copy(self, values, name: Hashable = no_default): else: return self._constructor._simple_new(values, name=name) - def _view(self: RangeIndex) -> RangeIndex: + def _view(self) -> Self: result = type(self)._simple_new(self._range, name=self._name) result._cache = self._cache return result @doc(Index.copy) - def copy(self, name: Hashable = None, deep: bool = False): + def copy(self, name: Hashable = None, deep: bool = False) -> Self: name = self._validate_names(name=name, deep=deep)[0] new_index = self._rename(name=name) return new_index @@ -814,17 +813,17 @@ def insert(self, loc: int, item) -> Index: rng = self._range if loc == 0 and item == self[0] - self.step: new_rng = range(rng.start - rng.step, rng.stop, rng.step) - return type(self)._simple_new(new_rng, name=self.name) + return type(self)._simple_new(new_rng, name=self._name) elif loc == len(self) and item == self[-1] + self.step: new_rng = range(rng.start, rng.stop + rng.step, rng.step) - return type(self)._simple_new(new_rng, name=self.name) + return type(self)._simple_new(new_rng, name=self._name) elif len(self) == 2 and item == self[0] + self.step / 2: # e.g. inserting 1 into [0, 2] step = int(self.step / 2) new_rng = range(self.start, self.stop, step) - return type(self)._simple_new(new_rng, name=self.name) + return type(self)._simple_new(new_rng, name=self._name) return super().insert(loc, item) @@ -922,7 +921,7 @@ def __getitem__(self, key): ) return super().__getitem__(key) - def _getitem_slice(self: RangeIndex, slobj: slice) -> RangeIndex: + def _getitem_slice(self, slobj: slice) -> Self: """ Fastpath for __getitem__ when we know we have a slice. """ @@ -937,11 +936,11 @@ def __floordiv__(self, other): step = self.step // other stop = start + len(self) * step new_range = range(start, stop, step or 1) - return self._simple_new(new_range, name=self.name) + return self._simple_new(new_range, name=self._name) if len(self) == 1: start = self.start // other new_range = range(start, start + 1, 1) - return self._simple_new(new_range, name=self.name) + return self._simple_new(new_range, name=self._name) return super().__floordiv__(other) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 407e16e1fa187..d72266346e654 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -167,7 +167,7 @@ def set_axis(self, axis: AxisInt, new_labels: Index) -> None: axis = self._normalize_axis(axis) self._axes[axis] = new_labels - def get_dtypes(self) -> np.ndarray: + def get_dtypes(self) -> npt.NDArray[np.object_]: return np.array([arr.dtype for arr in self.arrays], dtype="object") def add_references(self, mgr: BaseArrayManager) -> None: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index f48b044ff0016..6dcb73f6793ad 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2173,7 +2173,7 @@ def shift( def _catch_deprecated_value_error(err: Exception) -> None: """ We catch ValueError for now, but only a specific one raised by DatetimeArray - which will no longer be raised in version.2.0. + which will no longer be raised in version 2.0. """ if isinstance(err, ValueError): if isinstance(err, IncompatibleFrequency): @@ -2431,8 +2431,8 @@ def check_ndim(values, placement: BlockPlacement, ndim: int) -> None: def extract_pandas_array( - values: np.ndarray | ExtensionArray, dtype: DtypeObj | None, ndim: int -) -> tuple[np.ndarray | ExtensionArray, DtypeObj | None]: + values: ArrayLike, dtype: DtypeObj | None, ndim: int +) -> tuple[ArrayLike, DtypeObj | None]: """ Ensure that we don't allow PandasArray / PandasDtype in internals. """ @@ -2492,7 +2492,7 @@ def to_native_types( float_format=None, decimal: str = ".", **kwargs, -) -> np.ndarray: +) -> npt.NDArray[np.object_]: """convert to our native types format""" if isinstance(values, Categorical) and values.categories.dtype.kind in "Mm": # GH#40754 Convert categorical datetimes to datetime array diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index cb644c8329179..ed1a9b193b3e4 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -273,8 +273,8 @@ def references_same_values(self, mgr: BaseBlockManager, blkno: int) -> bool: ref = weakref.ref(self.blocks[blkno]) return ref in mgr.blocks[blkno].refs.referenced_blocks - def get_dtypes(self): - dtypes = np.array([blk.dtype for blk in self.blocks]) + def get_dtypes(self) -> npt.NDArray[np.object_]: + dtypes = np.array([blk.dtype for blk in self.blocks], dtype=object) return dtypes.take(self.blknos) @property @@ -2010,8 +2010,8 @@ def index(self) -> Index: def dtype(self) -> DtypeObj: return self._block.dtype - def get_dtypes(self) -> np.ndarray: - return np.array([self._block.dtype]) + def get_dtypes(self) -> npt.NDArray[np.object_]: + return np.array([self._block.dtype], dtype=object) def external_values(self): """The array that Series.values returns""" diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index 2fa059178d238..e9f1eaabbe246 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -11,7 +11,6 @@ ) from typing import ( TYPE_CHECKING, - Any, Callable, Hashable, Sequence, @@ -77,7 +76,7 @@ def describe_ndframe( ------- Dataframe or series description. """ - percentiles = refine_percentiles(percentiles) + percentiles = _refine_percentiles(percentiles) describer: NDFrameDescriberAbstract if obj.ndim == 1: @@ -175,7 +174,7 @@ def describe(self, percentiles: Sequence[float] | np.ndarray) -> DataFrame: d.columns = data.columns.copy() return d - def _select_data(self): + def _select_data(self) -> DataFrame: """Select columns to be described.""" if (self.include is None) and (self.exclude is None): # when some numerics are found, keep only numerics @@ -193,7 +192,7 @@ def _select_data(self): include=self.include, exclude=self.exclude, ) - return data + return data # pyright: ignore def reorder_columns(ldesc: Sequence[Series]) -> list[Hashable]: @@ -229,9 +228,9 @@ def describe_numeric_1d(series: Series, percentiles: Sequence[float]) -> Series: ) # GH#48340 - always return float on non-complex numeric data dtype: DtypeObj | None - if is_extension_array_dtype(series): + if is_extension_array_dtype(series.dtype): dtype = Float64Dtype() - elif is_numeric_dtype(series) and not is_complex_dtype(series): + elif is_numeric_dtype(series.dtype) and not is_complex_dtype(series.dtype): dtype = np.dtype("float") else: dtype = None @@ -364,9 +363,9 @@ def select_describe_func( return describe_categorical_1d -def refine_percentiles( +def _refine_percentiles( percentiles: Sequence[float] | np.ndarray | None, -) -> np.ndarray[Any, np.dtype[np.float64]]: +) -> npt.NDArray[np.float64]: """ Ensure that percentiles are unique and sorted. diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index eaeef4d00e23e..33019269aa3b7 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -420,7 +420,7 @@ def test_maybe_promote_timedelta64_with_any(timedelta64_dtype, any_numpy_dtype): [pd.Timedelta(days=1), np.timedelta64(24, "h"), datetime.timedelta(1)], ids=["pd.Timedelta", "np.timedelta64", "datetime.timedelta"], ) -def test_maybe_promote_any_with_timedelta64(any_numpy_dtype, fill_value, request): +def test_maybe_promote_any_with_timedelta64(any_numpy_dtype, fill_value): dtype = np.dtype(any_numpy_dtype) # filling anything but timedelta with timedelta casts to object diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 40c8e4fa27f90..090b3d64e7c41 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -1803,12 +1803,12 @@ def test_alignment_non_pandas(self, val): align = DataFrame._align_for_op expected = DataFrame({"X": val, "Y": val, "Z": val}, index=df.index) - tm.assert_frame_equal(align(df, val, "index")[1], expected) + tm.assert_frame_equal(align(df, val, axis=0)[1], expected) expected = DataFrame( {"X": [1, 1, 1], "Y": [2, 2, 2], "Z": [3, 3, 3]}, index=df.index ) - tm.assert_frame_equal(align(df, val, "columns")[1], expected) + tm.assert_frame_equal(align(df, val, axis=1)[1], expected) @pytest.mark.parametrize("val", [[1, 2], (1, 2), np.array([1, 2]), range(1, 3)]) def test_alignment_non_pandas_length_mismatch(self, val): @@ -1820,10 +1820,10 @@ def test_alignment_non_pandas_length_mismatch(self, val): # length mismatch msg = "Unable to coerce to Series, length must be 3: given 2" with pytest.raises(ValueError, match=msg): - align(df, val, "index") + align(df, val, axis=0) with pytest.raises(ValueError, match=msg): - align(df, val, "columns") + align(df, val, axis=1) def test_alignment_non_pandas_index_columns(self): index = ["A", "B", "C"] @@ -1833,11 +1833,11 @@ def test_alignment_non_pandas_index_columns(self): align = DataFrame._align_for_op val = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) tm.assert_frame_equal( - align(df, val, "index")[1], + align(df, val, axis=0)[1], DataFrame(val, index=df.index, columns=df.columns), ) tm.assert_frame_equal( - align(df, val, "columns")[1], + align(df, val, axis=1)[1], DataFrame(val, index=df.index, columns=df.columns), ) @@ -1845,19 +1845,19 @@ def test_alignment_non_pandas_index_columns(self): msg = "Unable to coerce to DataFrame, shape must be" val = np.array([[1, 2, 3], [4, 5, 6]]) with pytest.raises(ValueError, match=msg): - align(df, val, "index") + align(df, val, axis=0) with pytest.raises(ValueError, match=msg): - align(df, val, "columns") + align(df, val, axis=1) val = np.zeros((3, 3, 3)) msg = re.escape( "Unable to coerce to Series/DataFrame, dimension must be <= 2: (3, 3, 3)" ) with pytest.raises(ValueError, match=msg): - align(df, val, "index") + align(df, val, axis=0) with pytest.raises(ValueError, match=msg): - align(df, val, "columns") + align(df, val, axis=1) def test_no_warning(self, all_arithmetic_operators): df = DataFrame({"A": [0.0, 0.0], "B": [0.0, None]}) diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index aae51ebc5a017..71ce8541de24b 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -438,7 +438,7 @@ def test_astype_ea_to_datetimetzdtype(self, dtype): tm.assert_series_equal(result, expected) - def test_astype_retain_Attrs(self, any_numpy_dtype): + def test_astype_retain_attrs(self, any_numpy_dtype): # GH#44414 ser = Series([0, 1, 2, 3]) ser.attrs["Location"] = "Michigan" diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index 2c427399c9cd5..fcadb07a13b83 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -1,6 +1,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas import ( NA, Categorical, @@ -300,13 +302,11 @@ def test_reindex_fill_value(): tm.assert_series_equal(result, expected) +@td.skip_array_manager_not_yet_implemented @pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"]) @pytest.mark.parametrize("fill_value", ["string", 0, Timedelta(0)]) def test_reindex_fill_value_datetimelike_upcast(dtype, fill_value, using_array_manager): # https://github.com/pandas-dev/pandas/issues/42921 - if using_array_manager: - pytest.skip("Array manager does not promote dtype, hence we fail") - if dtype == "timedelta64[ns]" and fill_value == Timedelta(0): # use the scalar that is not compatible with the dtype for this test fill_value = Timestamp(0) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index ae5543ff266ef..58bdf3666caf4 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1306,7 +1306,7 @@ def test_categorical_zeroes(self): ) tm.assert_series_equal(result, expected, check_index_type=True) - def test_dropna(self): + def test_value_counts_dropna(self): # https://github.com/pandas-dev/pandas/issues/9443#issuecomment-73719328 tm.assert_series_equal( diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 5962d52edae3e..7d3aaf7fd3744 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1085,7 +1085,7 @@ def test_to_datetime_array_of_dt64s(self, cache, unit): # A list of datetimes where the last one is out of bounds dts_with_oob = dts + [np.datetime64("9999-01-01")] - # As of GH#?? we do not raise in this case + # As of GH#51978 we do not raise in this case to_datetime(dts_with_oob, errors="raise") result = to_datetime(dts_with_oob, errors="coerce", cache=cache)