diff --git a/pandas/_libs/algos.pyi b/pandas/_libs/algos.pyi index 9da5534c51321..60279395724ff 100644 --- a/pandas/_libs/algos.pyi +++ b/pandas/_libs/algos.pyi @@ -3,6 +3,8 @@ from typing import Any import numpy as np +from pandas._typing import npt + class Infinity: """ Provide a positive Infinity comparison method for ranking. @@ -30,7 +32,7 @@ class NegInfinity: def unique_deltas( arr: np.ndarray, # const int64_t[:] ) -> np.ndarray: ... # np.ndarray[np.int64, ndim=1] -def is_lexsorted(list_of_arrays: list[np.ndarray]) -> bool: ... +def is_lexsorted(list_of_arrays: list[npt.NDArray[np.int64]]) -> bool: ... def groupsort_indexer( index: np.ndarray, # const int64_t[:] ngroups: int, @@ -146,18 +148,18 @@ def diff_2d( axis: int, datetimelike: bool = ..., ) -> None: ... -def ensure_platform_int(arr: object) -> np.ndarray: ... -def ensure_object(arr: object) -> np.ndarray: ... -def ensure_float64(arr: object, copy=True) -> np.ndarray: ... -def ensure_float32(arr: object, copy=True) -> np.ndarray: ... -def ensure_int8(arr: object, copy=True) -> np.ndarray: ... -def ensure_int16(arr: object, copy=True) -> np.ndarray: ... -def ensure_int32(arr: object, copy=True) -> np.ndarray: ... -def ensure_int64(arr: object, copy=True) -> np.ndarray: ... -def ensure_uint8(arr: object, copy=True) -> np.ndarray: ... -def ensure_uint16(arr: object, copy=True) -> np.ndarray: ... -def ensure_uint32(arr: object, copy=True) -> np.ndarray: ... -def ensure_uint64(arr: object, copy=True) -> np.ndarray: ... +def ensure_platform_int(arr: object) -> npt.NDArray[np.intp]: ... +def ensure_object(arr: object) -> npt.NDArray[np.object_]: ... +def ensure_float64(arr: object, copy=True) -> npt.NDArray[np.float64]: ... +def ensure_float32(arr: object, copy=True) -> npt.NDArray[np.float32]: ... +def ensure_int8(arr: object, copy=True) -> npt.NDArray[np.int8]: ... +def ensure_int16(arr: object, copy=True) -> npt.NDArray[np.int16]: ... +def ensure_int32(arr: object, copy=True) -> npt.NDArray[np.int32]: ... +def ensure_int64(arr: object, copy=True) -> npt.NDArray[np.int64]: ... +def ensure_uint8(arr: object, copy=True) -> npt.NDArray[np.uint8]: ... +def ensure_uint16(arr: object, copy=True) -> npt.NDArray[np.uint16]: ... +def ensure_uint32(arr: object, copy=True) -> npt.NDArray[np.uint32]: ... +def ensure_uint64(arr: object, copy=True) -> npt.NDArray[np.uint64]: ... def take_1d_int8_int8( values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=... ) -> None: ... diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 2353c66f3378f..167fac257075c 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -122,7 +122,7 @@ cpdef ndarray[int64_t, ndim=1] unique_deltas(const int64_t[:] arr): Parameters ---------- - arr : ndarray[in64_t] + arr : ndarray[int64_t] Returns ------- diff --git a/pandas/_libs/hashing.pyi b/pandas/_libs/hashing.pyi index 2844ec9b06557..8361026e4a87d 100644 --- a/pandas/_libs/hashing.pyi +++ b/pandas/_libs/hashing.pyi @@ -1,7 +1,9 @@ import numpy as np +from pandas._typing import npt + def hash_object_array( - arr: np.ndarray, # np.ndarray[object] + arr: npt.NDArray[np.object_], key: str, encoding: str = ..., -) -> np.ndarray: ... # np.ndarray[np.uint64] +) -> npt.NDArray[np.uint64]: ... diff --git a/pandas/_libs/index.pyi b/pandas/_libs/index.pyi index 6bb332435be63..8b09c3771d070 100644 --- a/pandas/_libs/index.pyi +++ b/pandas/_libs/index.pyi @@ -1,5 +1,7 @@ import numpy as np +from pandas._typing import npt + class IndexEngine: over_size_threshold: bool def __init__(self, vgetter, n: int): ... @@ -16,21 +18,18 @@ class IndexEngine: def is_monotonic_decreasing(self) -> bool: ... def get_backfill_indexer( self, other: np.ndarray, limit: int | None = ... - ) -> np.ndarray: ... + ) -> npt.NDArray[np.intp]: ... def get_pad_indexer( self, other: np.ndarray, limit: int | None = ... - ) -> np.ndarray: ... + ) -> npt.NDArray[np.intp]: ... @property def is_mapping_populated(self) -> bool: ... def clear_mapping(self): ... - def get_indexer(self, values: np.ndarray) -> np.ndarray: ... # np.ndarray[np.intp] + def get_indexer(self, values: np.ndarray) -> npt.NDArray[np.intp]: ... def get_indexer_non_unique( self, targets: np.ndarray, - ) -> tuple[ - np.ndarray, # np.ndarray[np.intp] - np.ndarray, # np.ndarray[np.intp] - ]: ... + ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... class Float64Engine(IndexEngine): ... class Float32Engine(IndexEngine): ... @@ -58,8 +57,8 @@ class BaseMultiIndexCodesEngine: ): ... def get_indexer( self, - target: np.ndarray, # np.ndarray[object] - ) -> np.ndarray: ... # np.ndarray[np.intp] + target: npt.NDArray[np.object_], + ) -> npt.NDArray[np.intp]: ... def _extract_level_codes(self, target: object): ... def get_indexer_with_fill( self, diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 4e1197c43daf7..3ba18b525a1e8 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -433,7 +433,7 @@ def unique(values): unique1d = unique -def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray: +def isin(comps: AnyArrayLike, values: AnyArrayLike) -> npt.NDArray[np.bool_]: """ Compute the isin boolean array. @@ -903,7 +903,7 @@ def value_counts_arraylike(values, dropna: bool): def duplicated( values: ArrayLike, keep: Literal["first", "last", False] = "first" -) -> np.ndarray: +) -> npt.NDArray[np.bool_]: """ Return boolean ndarray denoting duplicate values. @@ -1032,8 +1032,8 @@ def rank( def checked_add_with_arr( arr: np.ndarray, b, - arr_mask: np.ndarray | None = None, - b_mask: np.ndarray | None = None, + arr_mask: npt.NDArray[np.bool_] | None = None, + b_mask: npt.NDArray[np.bool_] | None = None, ) -> np.ndarray: """ Perform array addition that checks for underflow and overflow. diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 3402b9594e6dd..ad3120c9c27d3 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -49,6 +49,7 @@ DtypeObj, NpDtype, PositionalIndexer2D, + npt, ) from pandas.compat.numpy import function as nv from pandas.errors import ( @@ -274,7 +275,7 @@ def __iter__(self): return (self._box_func(v) for v in self.asi8) @property - def asi8(self) -> np.ndarray: + def asi8(self) -> npt.NDArray[np.int64]: """ Integer representation of the values. @@ -774,7 +775,7 @@ def map(self, mapper): return Index(self).map(mapper).array - def isin(self, values) -> np.ndarray: + def isin(self, values) -> npt.NDArray[np.bool_]: """ Compute boolean array of whether each value is found in the passed set of values. @@ -830,11 +831,11 @@ def isin(self, values) -> np.ndarray: # ------------------------------------------------------------------ # Null Handling - def isna(self) -> np.ndarray: + def isna(self) -> npt.NDArray[np.bool_]: return self._isnan @property # NB: override with cache_readonly in immutable subclasses - def _isnan(self) -> np.ndarray: + def _isnan(self) -> npt.NDArray[np.bool_]: """ return if each value is nan """ @@ -1535,7 +1536,7 @@ class DatelikeOps(DatetimeLikeArrayMixin): URL="https://docs.python.org/3/library/datetime.html" "#strftime-and-strptime-behavior" ) - def strftime(self, date_format: str) -> np.ndarray: + def strftime(self, date_format: str) -> npt.NDArray[np.object_]: """ Convert to Index using specified date_format. @@ -1551,7 +1552,7 @@ def strftime(self, date_format: str) -> np.ndarray: Returns ------- - ndarray + ndarray[object] NumPy ndarray of formatted strings. See Also diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 17461ad95866e..8adc465f22f2a 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -38,6 +38,7 @@ to_offset, tzconversion, ) +from pandas._typing import npt from pandas.errors import PerformanceWarning from pandas.core.dtypes.cast import astype_dt64_to_dt64tz @@ -656,7 +657,7 @@ def astype(self, dtype, copy: bool = True): @dtl.ravel_compat def _format_native_types( self, na_rep="NaT", date_format=None, **kwargs - ) -> np.ndarray: + ) -> npt.NDArray[np.object_]: from pandas.io.formats.format import get_format_datetime64_from_values fmt = get_format_datetime64_from_values(self, date_format) @@ -1045,7 +1046,7 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise") -> DatetimeArr # ---------------------------------------------------------------- # Conversion Methods - Vectorized analogues of Timestamp methods - def to_pydatetime(self) -> np.ndarray: + def to_pydatetime(self) -> npt.NDArray[np.object_]: """ Return Datetime Array/Index as object ndarray of datetime.datetime objects. @@ -1262,7 +1263,7 @@ def day_name(self, locale=None): return result @property - def time(self) -> np.ndarray: + def time(self) -> npt.NDArray[np.object_]: """ Returns numpy array of datetime.time. The time part of the Timestamps. """ @@ -1274,7 +1275,7 @@ def time(self) -> np.ndarray: return ints_to_pydatetime(timestamps, box="time") @property - def timetz(self) -> np.ndarray: + def timetz(self) -> npt.NDArray[np.object_]: """ Returns numpy array of datetime.time also containing timezone information. The time part of the Timestamps. @@ -1282,7 +1283,7 @@ def timetz(self) -> np.ndarray: return ints_to_pydatetime(self.asi8, self.tz, box="time") @property - def date(self) -> np.ndarray: + def date(self) -> npt.NDArray[np.object_]: """ Returns numpy array of python datetime.date objects (namely, the date part of Timestamps without timezone information). diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 45fab72669eb7..37488a8e4993e 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -947,9 +947,7 @@ def agg_series( out = npvalues return out - def _aggregate_series_fast(self, obj: Series, func: F) -> np.ndarray: - # -> np.ndarray[object] - + def _aggregate_series_fast(self, obj: Series, func: F) -> npt.NDArray[np.object_]: # At this point we have already checked that # - obj.index is not a MultiIndex # - obj is backed by an ndarray, not ExtensionArray @@ -967,8 +965,9 @@ def _aggregate_series_fast(self, obj: Series, func: F) -> np.ndarray: return result @final - def _aggregate_series_pure_python(self, obj: Series, func: F) -> np.ndarray: - # -> np.ndarray[object] + def _aggregate_series_pure_python( + self, obj: Series, func: F + ) -> npt.NDArray[np.object_]: ids, _, ngroups = self.group_info counts = np.zeros(ngroups, dtype=int) @@ -1190,12 +1189,12 @@ def __init__( assert isinstance(axis, int), axis @cache_readonly - def slabels(self) -> np.ndarray: # np.ndarray[np.intp] + def slabels(self) -> npt.NDArray[np.intp]: # Sorted labels return self.labels.take(self._sort_idx) @cache_readonly - def _sort_idx(self) -> np.ndarray: # np.ndarray[np.intp] + def _sort_idx(self) -> npt.NDArray[np.intp]: # Counting sort indexer return get_group_index_sorter(self.labels, self.ngroups) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 60ab99df6eec9..51467704d6fef 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4450,7 +4450,9 @@ def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]: return join_index, left_indexer, right_indexer @final - def _join_monotonic(self, other: Index, how: str_t = "left"): + def _join_monotonic( + self, other: Index, how: str_t = "left" + ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: # We only get here with matching dtypes assert other.dtype == self.dtype diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index ff812a667bd98..8e8ed294304c5 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -15,6 +15,7 @@ from pandas._typing import ( Dtype, DtypeObj, + npt, ) from pandas.util._decorators import ( cache_readonly, @@ -411,7 +412,7 @@ class IntegerIndex(NumericIndex): _is_backward_compat_public_numeric_index: bool = False @property - def asi8(self) -> np.ndarray: + def asi8(self) -> npt.NDArray[np.int64]: # do not cache or you'll create a memory leak warnings.warn( "Index.asi8 is deprecated and will be removed in a future version.", diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 1a07b5614eb38..9e85cbec0f299 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -23,6 +23,7 @@ ArrayLike, Axis, F, + npt, ) from pandas.compat._optional import import_optional_dependency @@ -57,7 +58,7 @@ def check_value_size(value, mask: np.ndarray, length: int): return value -def mask_missing(arr: ArrayLike, values_to_mask) -> np.ndarray: +def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]: """ Return a masking array of same size/shape as arr with entries equaling any member of values_to_mask set to True diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index ebd7369607cc5..62abbb11ee405 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -931,17 +931,16 @@ def _maybe_add_join_keys( else: result.insert(i, name or f"key_{i}", key_col) - def _get_join_indexers(self) -> tuple[np.ndarray, np.ndarray]: + def _get_join_indexers(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: """return the join indexers""" - # Both returned ndarrays are np.intp return get_join_indexers( self.left_join_keys, self.right_join_keys, sort=self.sort, how=self.how ) def _get_join_info( self, - ) -> tuple[Index, np.ndarray | None, np.ndarray | None]: - # Both returned ndarrays are np.intp (if not None) + ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: + left_ax = self.left.axes[self.axis] right_ax = self.right.axes[self.axis] @@ -1892,8 +1891,7 @@ def _get_merge_keys(self): return left_join_keys, right_join_keys, join_names - def _get_join_indexers(self) -> tuple[np.ndarray, np.ndarray]: - # Both returned ndarrays are np.intp + def _get_join_indexers(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: """return the join indexers""" def flip(xs) -> np.ndarray: @@ -1987,8 +1985,7 @@ def flip(xs) -> np.ndarray: def _get_multiindex_indexer( join_keys, index: MultiIndex, sort: bool -) -> tuple[np.ndarray, np.ndarray]: - # Both returned ndarrays are np.intp +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: # left & right join labels and num. of levels at each location mapped = ( @@ -2026,8 +2023,7 @@ def _get_multiindex_indexer( def _get_single_indexer( join_key, index: Index, sort: bool = False -) -> tuple[np.ndarray, np.ndarray]: - # Both returned ndarrays are np.intp +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: left_key, right_key, count = _factorize_keys(join_key, index._values, sort=sort) return libjoin.left_outer_join(left_key, right_key, count, sort=sort) @@ -2035,8 +2031,7 @@ def _get_single_indexer( def _left_join_on_index( left_ax: Index, right_ax: Index, join_keys, sort: bool = False -) -> tuple[Index, np.ndarray | None, np.ndarray]: - # Both returned ndarrays are np.intp (if not None) +) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp]]: if len(join_keys) > 1: if not ( isinstance(right_ax, MultiIndex) and len(join_keys) == right_ax.nlevels @@ -2205,8 +2200,7 @@ def _factorize_keys( def _sort_labels( uniques: np.ndarray, left: np.ndarray, right: np.ndarray -) -> tuple[np.ndarray, np.ndarray]: - # Both returned ndarrays are np.intp +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: llength = len(left) labels = np.concatenate([left, right]) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 43a22a44d8300..c2d7f7b3f716c 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -27,6 +27,7 @@ to_offset, ) from pandas._libs.tslibs.parsing import get_rule_month +from pandas._typing import npt from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import ( @@ -228,11 +229,11 @@ def __init__(self, index, warn: bool = True): ) @cache_readonly - def deltas(self): + def deltas(self) -> npt.NDArray[np.int64]: return unique_deltas(self.i8values) @cache_readonly - def deltas_asi8(self): + def deltas_asi8(self) -> npt.NDArray[np.int64]: # NB: we cannot use self.i8values here because we may have converted # the tz in __init__ return unique_deltas(self.index.asi8) @@ -300,7 +301,7 @@ def hour_deltas(self): return [x / _ONE_HOUR for x in self.deltas] @cache_readonly - def fields(self): + def fields(self) -> np.ndarray: # structured array of fields return build_field_sarray(self.i8values) @cache_readonly @@ -311,12 +312,12 @@ def month_position_check(self): return month_position_check(self.fields, self.index.dayofweek) @cache_readonly - def mdiffs(self): + def mdiffs(self) -> npt.NDArray[np.int64]: nmonths = self.fields["Y"] * 12 + self.fields["M"] return unique_deltas(nmonths.astype("i8")) @cache_readonly - def ydiffs(self): + def ydiffs(self) -> npt.NDArray[np.int64]: return unique_deltas(self.fields["Y"].astype("i8")) def _infer_daily_rule(self) -> str | None: