diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 3391e2760187c..872f17b7f0770 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1,7 +1,7 @@ import operator from operator import le, lt import textwrap -from typing import Sequence, Type, TypeVar +from typing import Optional, Sequence, Type, TypeVar, cast import numpy as np @@ -14,7 +14,7 @@ intervals_to_interval_bounds, ) from pandas._libs.missing import NA -from pandas._typing import ArrayLike +from pandas._typing import ArrayLike, Dtype, NpDtype from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender @@ -170,7 +170,7 @@ def __new__( cls, data, closed=None, - dtype=None, + dtype: Optional[Dtype] = None, copy: bool = False, verify_integrity: bool = True, ): @@ -212,7 +212,13 @@ def __new__( @classmethod def _simple_new( - cls, left, right, closed=None, copy=False, dtype=None, verify_integrity=True + cls, + left, + right, + closed=None, + copy=False, + dtype: Optional[Dtype] = None, + verify_integrity=True, ): result = IntervalMixin.__new__(cls) @@ -223,12 +229,14 @@ def _simple_new( if dtype is not None: # GH 19262: dtype must be an IntervalDtype to override inferred dtype = pandas_dtype(dtype) - if not is_interval_dtype(dtype): + if is_interval_dtype(dtype): + dtype = cast(IntervalDtype, dtype) + if dtype.subtype is not None: + left = left.astype(dtype.subtype) + right = right.astype(dtype.subtype) + else: msg = f"dtype must be an IntervalDtype, got {dtype}" raise TypeError(msg) - elif dtype.subtype is not None: - left = left.astype(dtype.subtype) - right = right.astype(dtype.subtype) # coerce dtypes to match if needed if is_float_dtype(left) and is_integer_dtype(right): @@ -279,7 +287,9 @@ def _simple_new( return result @classmethod - def _from_sequence(cls, scalars, *, dtype=None, copy=False): + def _from_sequence( + cls, scalars, *, dtype: Optional[Dtype] = None, copy: bool = False + ): return cls(scalars, dtype=dtype, copy=copy) @classmethod @@ -338,7 +348,9 @@ def _from_factorized(cls, values, original): ), } ) - def from_breaks(cls, breaks, closed="right", copy=False, dtype=None): + def from_breaks( + cls, breaks, closed="right", copy: bool = False, dtype: Optional[Dtype] = None + ): breaks = maybe_convert_platform_interval(breaks) return cls.from_arrays(breaks[:-1], breaks[1:], closed, copy=copy, dtype=dtype) @@ -407,7 +419,9 @@ def from_breaks(cls, breaks, closed="right", copy=False, dtype=None): ), } ) - def from_arrays(cls, left, right, closed="right", copy=False, dtype=None): + def from_arrays( + cls, left, right, closed="right", copy=False, dtype: Optional[Dtype] = None + ): left = maybe_convert_platform_interval(left) right = maybe_convert_platform_interval(right) @@ -464,7 +478,9 @@ def from_arrays(cls, left, right, closed="right", copy=False, dtype=None): ), } ) - def from_tuples(cls, data, closed="right", copy=False, dtype=None): + def from_tuples( + cls, data, closed="right", copy=False, dtype: Optional[Dtype] = None + ): if len(data): left, right = [], [] else: @@ -1277,7 +1293,7 @@ def is_non_overlapping_monotonic(self): # --------------------------------------------------------------------- # Conversion - def __array__(self, dtype=None) -> np.ndarray: + def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray: """ Return the IntervalArray's data as a numpy array of Interval objects (with dtype='object') diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 3cf25847ed3d0..e4a98a54ee94c 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -5,7 +5,7 @@ import numpy as np from pandas._libs import lib, missing as libmissing -from pandas._typing import ArrayLike, Dtype, Scalar +from pandas._typing import ArrayLike, Dtype, NpDtype, Scalar from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly, doc @@ -147,7 +147,10 @@ def __invert__(self: BaseMaskedArrayT) -> BaseMaskedArrayT: return type(self)(~self._data, self._mask) def to_numpy( - self, dtype=None, copy: bool = False, na_value: Scalar = lib.no_default + self, + dtype: Optional[NpDtype] = None, + copy: bool = False, + na_value: Scalar = lib.no_default, ) -> np.ndarray: """ Convert to a NumPy Array. @@ -257,7 +260,7 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: __array_priority__ = 1000 # higher than ndarray so ops dispatch to us - def __array__(self, dtype=None) -> np.ndarray: + def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray: """ the array interface, return my values We return an object array here to preserve our scalar values diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index ae131d8a51ba1..9ed6306e5b9bc 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -1,11 +1,11 @@ import numbers -from typing import Tuple, Type, Union +from typing import Optional, Tuple, Type, Union import numpy as np from numpy.lib.mixins import NDArrayOperatorsMixin from pandas._libs import lib -from pandas._typing import Scalar +from pandas._typing import Dtype, NpDtype, Scalar from pandas.compat.numpy import function as nv from pandas.core.dtypes.dtypes import ExtensionDtype @@ -38,7 +38,7 @@ class PandasDtype(ExtensionDtype): _metadata = ("_dtype",) - def __init__(self, dtype: object): + def __init__(self, dtype: Optional[NpDtype]): self._dtype = np.dtype(dtype) def __repr__(self) -> str: @@ -173,7 +173,7 @@ def __init__(self, values: Union[np.ndarray, "PandasArray"], copy: bool = False) @classmethod def _from_sequence( - cls, scalars, *, dtype=None, copy: bool = False + cls, scalars, *, dtype: Optional[Dtype] = None, copy: bool = False ) -> "PandasArray": if isinstance(dtype, PandasDtype): dtype = dtype._dtype @@ -200,7 +200,7 @@ def dtype(self) -> PandasDtype: # ------------------------------------------------------------------------ # NumPy Array Interface - def __array__(self, dtype=None) -> np.ndarray: + def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray: return np.asarray(self._ndarray, dtype=dtype) _HANDLED_TYPES = (np.ndarray, numbers.Number) @@ -311,7 +311,15 @@ def prod(self, *, axis=None, skipna=True, min_count=0, **kwargs) -> Scalar: ) return self._wrap_reduction_result(axis, result) - def mean(self, *, axis=None, dtype=None, out=None, keepdims=False, skipna=True): + def mean( + self, + *, + axis=None, + dtype: Optional[NpDtype] = None, + out=None, + keepdims=False, + skipna=True, + ): nv.validate_mean((), {"dtype": dtype, "out": out, "keepdims": keepdims}) result = nanops.nanmean(self._ndarray, axis=axis, skipna=skipna) return self._wrap_reduction_result(axis, result) @@ -326,7 +334,14 @@ def median( return self._wrap_reduction_result(axis, result) def std( - self, *, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True + self, + *, + axis=None, + dtype: Optional[NpDtype] = None, + out=None, + ddof=1, + keepdims=False, + skipna=True, ): nv.validate_stat_ddof_func( (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="std" @@ -335,7 +350,14 @@ def std( return self._wrap_reduction_result(axis, result) def var( - self, *, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True + self, + *, + axis=None, + dtype: Optional[NpDtype] = None, + out=None, + ddof=1, + keepdims=False, + skipna=True, ): nv.validate_stat_ddof_func( (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="var" @@ -344,7 +366,14 @@ def var( return self._wrap_reduction_result(axis, result) def sem( - self, *, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True + self, + *, + axis=None, + dtype: Optional[NpDtype] = None, + out=None, + ddof=1, + keepdims=False, + skipna=True, ): nv.validate_stat_ddof_func( (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="sem" @@ -352,14 +381,30 @@ def sem( result = nanops.nansem(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) return self._wrap_reduction_result(axis, result) - def kurt(self, *, axis=None, dtype=None, out=None, keepdims=False, skipna=True): + def kurt( + self, + *, + axis=None, + dtype: Optional[NpDtype] = None, + out=None, + keepdims=False, + skipna=True, + ): nv.validate_stat_ddof_func( (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="kurt" ) result = nanops.nankurt(self._ndarray, axis=axis, skipna=skipna) return self._wrap_reduction_result(axis, result) - def skew(self, *, axis=None, dtype=None, out=None, keepdims=False, skipna=True): + def skew( + self, + *, + axis=None, + dtype: Optional[NpDtype] = None, + out=None, + keepdims=False, + skipna=True, + ): nv.validate_stat_ddof_func( (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="skew" ) @@ -370,7 +415,10 @@ def skew(self, *, axis=None, dtype=None, out=None, keepdims=False, skipna=True): # Additional Methods def to_numpy( - self, dtype=None, copy: bool = False, na_value=lib.no_default + self, + dtype: Optional[NpDtype] = None, + copy: bool = False, + na_value=lib.no_default, ) -> np.ndarray: result = np.asarray(self._ndarray, dtype=dtype) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index e0e40a666896d..e06315fbd4f78 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -26,7 +26,7 @@ get_period_field_arr, period_asfreq_arr, ) -from pandas._typing import AnyArrayLike, Dtype +from pandas._typing import AnyArrayLike, Dtype, NpDtype from pandas.util._decorators import cache_readonly, doc from pandas.core.dtypes.common import ( @@ -159,7 +159,7 @@ class PeriodArray(PeriodMixin, dtl.DatelikeOps): # -------------------------------------------------------------------- # Constructors - def __init__(self, values, dtype=None, freq=None, copy=False): + def __init__(self, values, dtype: Optional[Dtype] = None, freq=None, copy=False): freq = validate_dtype_freq(dtype, freq) if freq is not None: @@ -186,7 +186,10 @@ def __init__(self, values, dtype=None, freq=None, copy=False): @classmethod def _simple_new( - cls, values: np.ndarray, freq: Optional[BaseOffset] = None, dtype=None + cls, + values: np.ndarray, + freq: Optional[BaseOffset] = None, + dtype: Optional[Dtype] = None, ) -> "PeriodArray": # alias for PeriodArray.__init__ assertion_msg = "Should be numpy array of type i8" @@ -220,7 +223,7 @@ def _from_sequence( @classmethod def _from_sequence_of_strings( - cls, strings, *, dtype=None, copy=False + cls, strings, *, dtype: Optional[Dtype] = None, copy=False ) -> "PeriodArray": return cls._from_sequence(strings, dtype=dtype, copy=copy) @@ -301,7 +304,7 @@ def freq(self) -> BaseOffset: """ return self.dtype.freq - def __array__(self, dtype=None) -> np.ndarray: + def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray: if dtype == "i8": return self.asi8 elif dtype == bool: diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 26dbe5e0dba44..b4d4fd5cc7106 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -4,7 +4,7 @@ from collections import abc import numbers import operator -from typing import Any, Callable, Sequence, Type, TypeVar, Union +from typing import Any, Callable, Optional, Sequence, Type, TypeVar, Union import warnings import numpy as np @@ -13,7 +13,7 @@ import pandas._libs.sparse as splib from pandas._libs.sparse import BlockIndex, IntIndex, SparseIndex from pandas._libs.tslibs import NaT -from pandas._typing import Scalar +from pandas._typing import Dtype, NpDtype, Scalar from pandas.compat.numpy import function as nv from pandas.errors import PerformanceWarning @@ -174,7 +174,7 @@ def _sparse_array_op( return _wrap_result(name, result, index, fill, dtype=result_dtype) -def _wrap_result(name, data, sparse_index, fill_value, dtype=None): +def _wrap_result(name, data, sparse_index, fill_value, dtype: Optional[Dtype] = None): """ wrap op result to have correct dtype """ @@ -281,7 +281,7 @@ def __init__( index=None, fill_value=None, kind="integer", - dtype=None, + dtype: Optional[Dtype] = None, copy=False, ): @@ -454,7 +454,7 @@ def from_spmatrix(cls, data): return cls._simple_new(arr, index, dtype) - def __array__(self, dtype=None) -> np.ndarray: + def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray: fill_value = self.fill_value if self.sp_index.ngaps == 0: @@ -487,7 +487,7 @@ def __setitem__(self, key, value): raise TypeError(msg) @classmethod - def _from_sequence(cls, scalars, *, dtype=None, copy=False): + def _from_sequence(cls, scalars, *, dtype: Optional[Dtype] = None, copy=False): return cls(scalars, dtype=dtype) @classmethod @@ -998,7 +998,7 @@ def _concat_same_type( return cls(data, sparse_index=sp_index, fill_value=fill_value) - def astype(self, dtype=None, copy=True): + def astype(self, dtype: Optional[Dtype] = None, copy=True): """ Change the dtype of a SparseArray. @@ -1461,7 +1461,9 @@ def _formatter(self, boxed=False): return None -def make_sparse(arr: np.ndarray, kind="block", fill_value=None, dtype=None): +def make_sparse( + arr: np.ndarray, kind="block", fill_value=None, dtype: Optional[NpDtype] = None +): """ Convert ndarray to sparse format diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 74a41a0b64ff8..3d0ac3380ec39 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -1,9 +1,9 @@ -from typing import TYPE_CHECKING, Type, Union +from typing import TYPE_CHECKING, Optional, Type, Union import numpy as np from pandas._libs import lib, missing as libmissing -from pandas._typing import Scalar +from pandas._typing import Dtype, Scalar from pandas.compat.numpy import function as nv from pandas.core.dtypes.base import ExtensionDtype, register_extension_dtype @@ -206,7 +206,7 @@ def _validate(self): ) @classmethod - def _from_sequence(cls, scalars, *, dtype=None, copy=False): + def _from_sequence(cls, scalars, *, dtype: Optional[Dtype] = None, copy=False): if dtype: assert dtype == "string" @@ -234,7 +234,9 @@ def _from_sequence(cls, scalars, *, dtype=None, copy=False): return new_string_array @classmethod - def _from_sequence_of_strings(cls, strings, *, dtype=None, copy=False): + def _from_sequence_of_strings( + cls, strings, *, dtype: Optional[Dtype] = None, copy=False + ): return cls._from_sequence(strings, dtype=dtype, copy=copy) def __arrow_array__(self, type=None): @@ -381,7 +383,7 @@ def _cmp_method(self, other, op): # String methods interface _str_na_value = StringDtype.na_value - def _str_map(self, f, na_value=None, dtype=None): + def _str_map(self, f, na_value=None, dtype: Optional[Dtype] = None): from pandas.arrays import BooleanArray, IntegerArray, StringArray from pandas.core.arrays.string_ import StringDtype diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 3a351bf497662..d37e91e55a9cf 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -1,11 +1,12 @@ from __future__ import annotations from distutils.version import LooseVersion -from typing import TYPE_CHECKING, Any, Sequence, Type, Union +from typing import TYPE_CHECKING, Any, Optional, Sequence, Type, Union import numpy as np from pandas._libs import lib, missing as libmissing +from pandas._typing import Dtype, NpDtype from pandas.util._validators import validate_fillna_kwargs from pandas.core.dtypes.base import ExtensionDtype @@ -203,14 +204,16 @@ def _chk_pyarrow_available(cls) -> None: raise ImportError(msg) @classmethod - def _from_sequence(cls, scalars, dtype=None, copy=False): + def _from_sequence(cls, scalars, dtype: Optional[Dtype] = None, copy=False): cls._chk_pyarrow_available() # convert non-na-likes to str, and nan-likes to ArrowStringDtype.na_value scalars = lib.ensure_string_array(scalars, copy=False) return cls(pa.array(scalars, type=pa.string(), from_pandas=True)) @classmethod - def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): + def _from_sequence_of_strings( + cls, strings, dtype: Optional[Dtype] = None, copy=False + ): return cls._from_sequence(strings, dtype=dtype, copy=copy) @property @@ -220,7 +223,7 @@ def dtype(self) -> ArrowStringDtype: """ return self._dtype - def __array__(self, dtype=None) -> np.ndarray: + def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray: """Correctly construct numpy arrays when passed to `np.asarray()`.""" return self.to_numpy(dtype=dtype) @@ -229,7 +232,10 @@ def __arrow_array__(self, type=None): return self._data def to_numpy( - self, dtype=None, copy: bool = False, na_value=lib.no_default + self, + dtype: Optional[NpDtype] = None, + copy: bool = False, + na_value=lib.no_default, ) -> np.ndarray: """ Convert to a NumPy ndarray. diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 55136e0dedcf5..62d5a4d30563b 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -22,6 +22,7 @@ ints_to_pytimedelta, parse_timedelta_unit, ) +from pandas._typing import NpDtype from pandas.compat.numpy import function as nv from pandas.core.dtypes.cast import astype_td64_unit_conversion @@ -352,7 +353,7 @@ def sum( self, *, axis=None, - dtype=None, + dtype: Optional[NpDtype] = None, out=None, keepdims: bool = False, initial=None, @@ -372,7 +373,7 @@ def std( self, *, axis=None, - dtype=None, + dtype: Optional[NpDtype] = None, out=None, ddof: int = 1, keepdims: bool = False,