From 2b5cbd00e423f4643e4dcac1f2903e194d2f1d38 Mon Sep 17 00:00:00 2001 From: jreback Date: Mon, 1 Feb 2021 07:29:44 +0000 Subject: [PATCH] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20UPGRADE:=20Autoupdate=20pr?= =?UTF-8?q?e-commit=20config?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 4 +- pandas/_testing/__init__.py | 32 +-- pandas/compat/pickle_compat.py | 2 +- pandas/core/aggregation.py | 26 +- pandas/core/algorithms.py | 16 +- pandas/core/apply.py | 22 +- pandas/core/arrays/_mixins.py | 8 +- pandas/core/arrays/base.py | 24 +- pandas/core/arrays/boolean.py | 20 +- pandas/core/arrays/categorical.py | 16 +- pandas/core/arrays/datetimelike.py | 46 +-- pandas/core/arrays/datetimes.py | 20 +- pandas/core/arrays/floating.py | 8 +- pandas/core/arrays/integer.py | 14 +- pandas/core/arrays/interval.py | 16 +- pandas/core/arrays/masked.py | 20 +- pandas/core/arrays/numeric.py | 4 +- pandas/core/arrays/numpy_.py | 22 +- pandas/core/arrays/period.py | 28 +- pandas/core/arrays/sparse/array.py | 18 +- pandas/core/arrays/sparse/dtype.py | 6 +- pandas/core/arrays/string_.py | 14 +- pandas/core/arrays/string_arrow.py | 16 +- pandas/core/arrays/timedeltas.py | 12 +- pandas/core/computation/align.py | 10 +- pandas/core/computation/ops.py | 4 +- pandas/core/computation/pytables.py | 16 +- pandas/core/computation/scope.py | 2 +- pandas/core/construction.py | 24 +- pandas/core/describe.py | 18 +- pandas/core/dtypes/base.py | 20 +- pandas/core/dtypes/cast.py | 28 +- pandas/core/dtypes/dtypes.py | 54 ++-- pandas/core/frame.py | 270 +++++++++--------- pandas/core/generic.py | 188 ++++++------ pandas/core/groupby/generic.py | 30 +- pandas/core/groupby/groupby.py | 58 ++-- pandas/core/groupby/grouper.py | 16 +- pandas/core/groupby/ops.py | 34 +-- pandas/core/indexes/base.py | 50 ++-- pandas/core/indexes/datetimes.py | 10 +- pandas/core/indexes/interval.py | 24 +- pandas/core/indexes/multi.py | 34 +-- pandas/core/indexes/period.py | 4 +- pandas/core/indexes/range.py | 12 +- pandas/core/indexing.py | 32 +-- pandas/core/internals/array_manager.py | 34 +-- pandas/core/internals/blocks.py | 112 ++++---- pandas/core/internals/concat.py | 16 +- pandas/core/internals/construction.py | 62 ++-- pandas/core/internals/managers.py | 58 ++-- pandas/core/internals/ops.py | 6 +- pandas/core/missing.py | 18 +- pandas/core/ops/__init__.py | 6 +- pandas/core/resample.py | 20 +- pandas/core/reshape/concat.py | 16 +- pandas/core/reshape/melt.py | 2 +- pandas/core/reshape/merge.py | 58 ++-- pandas/core/reshape/pivot.py | 20 +- pandas/core/reshape/reshape.py | 12 +- pandas/core/series.py | 52 ++-- pandas/core/sorting.py | 20 +- pandas/core/tools/datetimes.py | 54 ++-- pandas/core/window/ewm.py | 26 +- pandas/core/window/rolling.py | 48 ++-- pandas/io/common.py | 30 +- pandas/io/excel/_base.py | 8 +- pandas/io/excel/_openpyxl.py | 14 +- pandas/io/formats/csvs.py | 42 +-- pandas/io/formats/format.py | 248 ++++++++-------- pandas/io/formats/info.py | 46 +-- pandas/io/formats/style.py | 90 +++--- pandas/io/gbq.py | 28 +- pandas/io/json/_normalize.py | 22 +- pandas/io/orc.py | 2 +- pandas/io/parquet.py | 20 +- pandas/io/pytables.py | 226 +++++++-------- pandas/io/sas/sasreader.py | 24 +- pandas/io/stata.py | 150 +++++----- pandas/plotting/_core.py | 30 +- pandas/plotting/_matplotlib/__init__.py | 2 +- pandas/plotting/_matplotlib/core.py | 16 +- pandas/plotting/_matplotlib/misc.py | 18 +- pandas/plotting/_matplotlib/style.py | 34 +-- pandas/plotting/_matplotlib/timeseries.py | 4 +- pandas/plotting/_matplotlib/tools.py | 10 +- pandas/tests/extension/arrow/arrays.py | 4 +- .../tests/extension/arrow/test_timestamp.py | 2 +- pandas/tests/extension/decimal/array.py | 2 +- pandas/tests/extension/json/array.py | 2 +- pandas/tests/extension/list/array.py | 2 +- pandas/tests/plotting/common.py | 2 +- 92 files changed, 1535 insertions(+), 1535 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d0940ce8be992..a00f3d039389c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,7 +24,7 @@ repos: hooks: - id: isort - repo: https://github.com/asottile/pyupgrade - rev: v2.7.4 + rev: v2.8.0 hooks: - id: pyupgrade args: [--py37-plus] @@ -192,6 +192,6 @@ repos: files: ^pandas/ exclude: ^pandas/tests/ - repo: https://github.com/MarcoGorelli/no-string-hints - rev: v0.1.6 + rev: v0.1.7 hooks: - id: no-string-hints diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 0b2be53131af6..5f7d1dfe1571d 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -107,24 +107,24 @@ _N = 30 _K = 4 -UNSIGNED_INT_DTYPES: List[Dtype] = ["uint8", "uint16", "uint32", "uint64"] -UNSIGNED_EA_INT_DTYPES: List[Dtype] = ["UInt8", "UInt16", "UInt32", "UInt64"] -SIGNED_INT_DTYPES: List[Dtype] = [int, "int8", "int16", "int32", "int64"] -SIGNED_EA_INT_DTYPES: List[Dtype] = ["Int8", "Int16", "Int32", "Int64"] +UNSIGNED_INT_DTYPES: list[Dtype] = ["uint8", "uint16", "uint32", "uint64"] +UNSIGNED_EA_INT_DTYPES: list[Dtype] = ["UInt8", "UInt16", "UInt32", "UInt64"] +SIGNED_INT_DTYPES: list[Dtype] = [int, "int8", "int16", "int32", "int64"] +SIGNED_EA_INT_DTYPES: list[Dtype] = ["Int8", "Int16", "Int32", "Int64"] ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES ALL_EA_INT_DTYPES = UNSIGNED_EA_INT_DTYPES + SIGNED_EA_INT_DTYPES -FLOAT_DTYPES: List[Dtype] = [float, "float32", "float64"] -FLOAT_EA_DTYPES: List[Dtype] = ["Float32", "Float64"] -COMPLEX_DTYPES: List[Dtype] = [complex, "complex64", "complex128"] -STRING_DTYPES: List[Dtype] = [str, "str", "U"] +FLOAT_DTYPES: list[Dtype] = [float, "float32", "float64"] +FLOAT_EA_DTYPES: list[Dtype] = ["Float32", "Float64"] +COMPLEX_DTYPES: list[Dtype] = [complex, "complex64", "complex128"] +STRING_DTYPES: list[Dtype] = [str, "str", "U"] -DATETIME64_DTYPES: List[Dtype] = ["datetime64[ns]", "M8[ns]"] -TIMEDELTA64_DTYPES: List[Dtype] = ["timedelta64[ns]", "m8[ns]"] +DATETIME64_DTYPES: list[Dtype] = ["datetime64[ns]", "M8[ns]"] +TIMEDELTA64_DTYPES: list[Dtype] = ["timedelta64[ns]", "m8[ns]"] -BOOL_DTYPES: List[Dtype] = [bool, "bool"] -BYTES_DTYPES: List[Dtype] = [bytes, "bytes"] -OBJECT_DTYPES: List[Dtype] = [object, "object"] +BOOL_DTYPES: list[Dtype] = [bool, "bool"] +BYTES_DTYPES: list[Dtype] = [bytes, "bytes"] +OBJECT_DTYPES: list[Dtype] = [object, "object"] ALL_REAL_DTYPES = FLOAT_DTYPES + ALL_INT_DTYPES ALL_NUMPY_DTYPES = ( @@ -417,7 +417,7 @@ def all_timeseries_index_generator(k: int = 10) -> Iterable[Index]: ---------- k: length of each of the index instances """ - make_index_funcs: List[Callable[..., Index]] = [ + make_index_funcs: list[Callable[..., Index]] = [ makeDateIndex, makePeriodIndex, makeTimedeltaIndex, @@ -865,7 +865,7 @@ def skipna_wrapper(x): return skipna_wrapper -def convert_rows_list_to_csv_str(rows_list: List[str]): +def convert_rows_list_to_csv_str(rows_list: list[str]): """ Convert list of CSV rows to single CSV-formatted string for current OS. @@ -885,7 +885,7 @@ def convert_rows_list_to_csv_str(rows_list: List[str]): return sep.join(rows_list) + sep -def external_error_raised(expected_exception: Type[Exception]) -> ContextManager: +def external_error_raised(expected_exception: type[Exception]) -> ContextManager: """ Helper function to mark pytest.raises that have an external error message. diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index e6940d78dbaa2..5456d30a468d5 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -229,7 +229,7 @@ def load_newobj_ex(self): pass -def load(fh, encoding: Optional[str] = None, is_verbose: bool = False): +def load(fh, encoding: str | None = None, is_verbose: bool = False): """ Load a pickle, with a provided encoding, diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index 5c99f783c70d9..a1b325ba64274 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -47,8 +47,8 @@ def reconstruct_func( - func: Optional[AggFuncType], **kwargs -) -> Tuple[bool, Optional[AggFuncType], Optional[List[str]], Optional[List[int]]]: + func: AggFuncType | None, **kwargs +) -> tuple[bool, AggFuncType | None, list[str] | None, list[int] | None]: """ This is the internal function to reconstruct func given if there is relabeling or not and also normalize the keyword to get new order of columns. @@ -86,8 +86,8 @@ def reconstruct_func( (False, 'min', None, None) """ relabeling = func is None and is_multi_agg_with_relabel(**kwargs) - columns: Optional[List[str]] = None - order: Optional[List[int]] = None + columns: list[str] | None = None + order: list[int] | None = None if not relabeling: if isinstance(func, list) and len(func) > len(set(func)): @@ -134,7 +134,7 @@ def is_multi_agg_with_relabel(**kwargs) -> bool: ) -def normalize_keyword_aggregation(kwargs: dict) -> Tuple[dict, List[str], List[int]]: +def normalize_keyword_aggregation(kwargs: dict) -> tuple[dict, list[str], list[int]]: """ Normalize user-provided "named aggregation" kwargs. Transforms from the new ``Mapping[str, NamedAgg]`` style kwargs @@ -190,8 +190,8 @@ def normalize_keyword_aggregation(kwargs: dict) -> Tuple[dict, List[str], List[i def _make_unique_kwarg_list( - seq: Sequence[Tuple[Any, Any]] -) -> Sequence[Tuple[Any, Any]]: + seq: Sequence[tuple[Any, Any]] +) -> Sequence[tuple[Any, Any]]: """ Uniquify aggfunc name of the pairs in the order list @@ -295,10 +295,10 @@ def maybe_mangle_lambdas(agg_spec: Any) -> Any: def relabel_result( result: FrameOrSeries, - func: Dict[str, List[Union[Callable, str]]], + func: dict[str, list[Callable | str]], columns: Iterable[Hashable], order: Iterable[int], -) -> Dict[Hashable, Series]: +) -> dict[Hashable, Series]: """ Internal function to reorder result if relabelling is True for dataframe.agg, and return the reordered result in dict. @@ -325,7 +325,7 @@ def relabel_result( reordered_indexes = [ pair[0] for pair in sorted(zip(columns, order), key=lambda t: t[1]) ] - reordered_result_in_dict: Dict[Hashable, Series] = {} + reordered_result_in_dict: dict[Hashable, Series] = {} idx = 0 reorder_mask = not isinstance(result, ABCSeries) and len(result.columns) > 1 @@ -369,7 +369,7 @@ def relabel_result( def validate_func_kwargs( kwargs: dict, -) -> Tuple[List[str], List[Union[str, Callable[..., Any]]]]: +) -> tuple[list[str], list[str | Callable[..., Any]]]: """ Validates types of user-provided "named aggregation" kwargs. `TypeError` is raised if aggfunc is not `str` or callable. @@ -495,7 +495,7 @@ def transform_dict_like( # GH 15931 - deprecation of renaming keys raise SpecificationError("nested renamer is not supported") - results: Dict[Hashable, FrameOrSeriesUnion] = {} + results: dict[Hashable, FrameOrSeriesUnion] = {} for name, how in func.items(): colg = obj._gotitem(name, ndim=1) try: @@ -536,7 +536,7 @@ def transform_str_or_callable( def agg_list_like( obj: AggObjType, - arg: List[AggFuncTypeBase], + arg: list[AggFuncTypeBase], _axis: int, ) -> FrameOrSeriesUnion: """ diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 3927fb04187f5..8851fde012d19 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -70,15 +70,15 @@ from pandas import Categorical, DataFrame, Index, Series from pandas.core.arrays import DatetimeArray, TimedeltaArray -_shared_docs: Dict[str, str] = {} +_shared_docs: dict[str, str] = {} # --------------- # # dtype access # # --------------- # def _ensure_data( - values: ArrayLike, dtype: Optional[DtypeObj] = None -) -> Tuple[np.ndarray, DtypeObj]: + values: ArrayLike, dtype: DtypeObj | None = None +) -> tuple[np.ndarray, DtypeObj]: """ routine to ensure that our data is of the correct input dtype for lower-level routines @@ -495,7 +495,7 @@ def f(c, v): def factorize_array( values: np.ndarray, na_sentinel: int = -1, size_hint=None, na_value=None, mask=None -) -> Tuple[np.ndarray, np.ndarray]: +) -> tuple[np.ndarray, np.ndarray]: """ Factorize an array-like to codes and uniques. @@ -558,9 +558,9 @@ def factorize_array( def factorize( values, sort: bool = False, - na_sentinel: Optional[int] = -1, - size_hint: Optional[int] = None, -) -> Tuple[np.ndarray, Union[np.ndarray, Index]]: + na_sentinel: int | None = -1, + size_hint: int | None = None, +) -> tuple[np.ndarray, np.ndarray | Index]: """ Encode the object as an enumerated type or categorical variable. @@ -2052,7 +2052,7 @@ def safe_sort( na_sentinel: int = -1, assume_unique: bool = False, verify: bool = True, -) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: +) -> np.ndarray | tuple[np.ndarray, np.ndarray]: """ Sort ``values`` and reorder corresponding ``codes``. diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 8207f4d6e33d4..0635cba190b6a 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -58,13 +58,13 @@ def frame_apply( func: AggFuncType, axis: Axis = 0, raw: bool = False, - result_type: Optional[str] = None, + result_type: str | None = None, args=None, kwds=None, ) -> FrameApply: """ construct and return a row or column based frame apply object """ axis = obj._get_axis_number(axis) - klass: Type[FrameApply] + klass: type[FrameApply] if axis == 0: klass = FrameRowApply elif axis == 1: @@ -104,7 +104,7 @@ def __init__( obj: AggObjType, func, raw: bool, - result_type: Optional[str], + result_type: str | None, args, kwds, ): @@ -144,7 +144,7 @@ def index(self) -> Index: def apply(self) -> FrameOrSeriesUnion: pass - def agg(self) -> Tuple[Optional[FrameOrSeriesUnion], Optional[bool]]: + def agg(self) -> tuple[FrameOrSeriesUnion | None, bool | None]: """ Provide an implementation for the aggregators. @@ -188,7 +188,7 @@ def agg(self) -> Tuple[Optional[FrameOrSeriesUnion], Optional[bool]]: # caller can react return result, True - def maybe_apply_str(self) -> Optional[FrameOrSeriesUnion]: + def maybe_apply_str(self) -> FrameOrSeriesUnion | None: """ Compute apply in case of a string. @@ -212,7 +212,7 @@ def maybe_apply_str(self) -> Optional[FrameOrSeriesUnion]: raise ValueError(f"Operation {f} does not support axis=1") return self.obj._try_aggregate_string_function(f, *self.args, **self.kwds) - def maybe_apply_multiple(self) -> Optional[FrameOrSeriesUnion]: + def maybe_apply_multiple(self) -> FrameOrSeriesUnion | None: """ Compute apply in case of a list-like or dict-like. @@ -411,7 +411,7 @@ def apply_standard(self): # wrap results return self.wrap_results(results, res_index) - def apply_series_generator(self) -> Tuple[ResType, Index]: + def apply_series_generator(self) -> tuple[ResType, Index]: assert callable(self.f) series_gen = self.series_generator @@ -664,11 +664,11 @@ def apply_standard(self) -> FrameOrSeriesUnion: class GroupByApply(Apply): - obj: Union[SeriesGroupBy, DataFrameGroupBy] + obj: SeriesGroupBy | DataFrameGroupBy def __init__( self, - obj: Union[SeriesGroupBy, DataFrameGroupBy], + obj: SeriesGroupBy | DataFrameGroupBy, func: AggFuncType, args, kwds, @@ -690,11 +690,11 @@ def apply(self): class ResamplerWindowApply(Apply): axis = 0 - obj: Union[Resampler, BaseWindow] + obj: Resampler | BaseWindow def __init__( self, - obj: Union[Resampler, BaseWindow], + obj: Resampler | BaseWindow, func: AggFuncType, args, kwds, diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 06b46c50e9467..1d3c4fbb84b0b 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -194,7 +194,7 @@ def unique(self: NDArrayBackedExtensionArrayT) -> NDArrayBackedExtensionArrayT: @classmethod @doc(ExtensionArray._concat_same_type) def _concat_same_type( - cls: Type[NDArrayBackedExtensionArrayT], + cls: type[NDArrayBackedExtensionArrayT], to_concat: Sequence[NDArrayBackedExtensionArrayT], axis: int = 0, ) -> NDArrayBackedExtensionArrayT: @@ -236,8 +236,8 @@ def _validate_setitem_value(self, value): return value def __getitem__( - self: NDArrayBackedExtensionArrayT, key: Union[int, slice, np.ndarray] - ) -> Union[NDArrayBackedExtensionArrayT, Any]: + self: NDArrayBackedExtensionArrayT, key: int | slice | np.ndarray + ) -> NDArrayBackedExtensionArrayT | Any: if lib.is_integer(key): # fast-path result = self._ndarray[key] @@ -296,7 +296,7 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs): msg = f"'{type(self).__name__}' does not implement reduction '{name}'" raise TypeError(msg) - def _wrap_reduction_result(self, axis: Optional[int], result): + def _wrap_reduction_result(self, axis: int | None, result): if axis is None or self.ndim == 1: return self._box_func(result) return self._from_backing_data(result) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index a62a5ec4ec7f7..19ef4d19440dc 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -49,7 +49,7 @@ from pandas.core.missing import get_fill_func from pandas.core.sorting import nargminmax, nargsort -_extension_array_shared_docs: Dict[str, str] = {} +_extension_array_shared_docs: dict[str, str] = {} ExtensionArrayT = TypeVar("ExtensionArrayT", bound="ExtensionArray") @@ -190,7 +190,7 @@ class ExtensionArray: # ------------------------------------------------------------------------ @classmethod - def _from_sequence(cls, scalars, *, dtype: Optional[Dtype] = None, copy=False): + def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False): """ Construct a new ExtensionArray from a sequence of scalars. @@ -213,7 +213,7 @@ def _from_sequence(cls, scalars, *, dtype: Optional[Dtype] = None, copy=False): @classmethod def _from_sequence_of_strings( - cls, strings, *, dtype: Optional[Dtype] = None, copy=False + cls, strings, *, dtype: Dtype | None = None, copy=False ): """ Construct a new ExtensionArray from a sequence of strings. @@ -261,8 +261,8 @@ def _from_factorized(cls, values, original): # ------------------------------------------------------------------------ def __getitem__( - self, item: Union[int, slice, np.ndarray] - ) -> Union[ExtensionArray, Any]: + self, item: int | slice | np.ndarray + ) -> ExtensionArray | Any: """ Select a subset of self. @@ -293,7 +293,7 @@ def __getitem__( """ raise AbstractMethodError(self) - def __setitem__(self, key: Union[int, np.ndarray], value: Any) -> None: + def __setitem__(self, key: int | np.ndarray, value: Any) -> None: """ Set one or more values inplace. @@ -395,7 +395,7 @@ def __ne__(self, other: Any) -> ArrayLike: def to_numpy( self, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, copy: bool = False, na_value=lib.no_default, ) -> np.ndarray: @@ -869,7 +869,7 @@ def isin(self, values) -> np.ndarray: """ return isin(np.asarray(self), values) - def _values_for_factorize(self) -> Tuple[np.ndarray, Any]: + def _values_for_factorize(self) -> tuple[np.ndarray, Any]: """ Return an array and missing value suitable for factorization. @@ -893,7 +893,7 @@ def _values_for_factorize(self) -> Tuple[np.ndarray, Any]: """ return self.astype(object), np.nan - def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, ExtensionArray]: + def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, ExtensionArray]: """ Encode the extension array as an enumerated type. @@ -1101,7 +1101,7 @@ def copy(self: ExtensionArrayT) -> ExtensionArrayT: """ raise AbstractMethodError(self) - def view(self, dtype: Optional[Dtype] = None) -> ArrayLike: + def view(self, dtype: Dtype | None = None) -> ArrayLike: """ Return a view on the array. @@ -1139,7 +1139,7 @@ def __repr__(self) -> str: class_name = f"<{type(self).__name__}>\n" return f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}" - def _formatter(self, boxed: bool = False) -> Callable[[Any], Optional[str]]: + def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]: """ Formatting function for scalar values. @@ -1205,7 +1205,7 @@ def ravel(self, order="C") -> ExtensionArray: @classmethod def _concat_same_type( - cls: Type[ExtensionArrayT], to_concat: Sequence[ExtensionArrayT] + cls: type[ExtensionArrayT], to_concat: Sequence[ExtensionArrayT] ) -> ExtensionArrayT: """ Concatenate multiple array of this dtype. diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index dd281a39907fd..86d71c41886eb 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -60,7 +60,7 @@ class BooleanDtype(BaseMaskedDtype): # mypy: https://github.com/python/mypy/issues/4125 @property - def type(self) -> Type: # type: ignore[override] + def type(self) -> type: # type: ignore[override] return np.bool_ @property @@ -72,7 +72,7 @@ def numpy_dtype(self) -> np.dtype: return np.dtype("bool") @classmethod - def construct_array_type(cls) -> Type[BooleanArray]: + def construct_array_type(cls) -> type[BooleanArray]: """ Return the array type associated with this dtype. @@ -94,7 +94,7 @@ def _is_numeric(self) -> bool: return True def __from_arrow__( - self, array: Union[pyarrow.Array, pyarrow.ChunkedArray] + self, array: pyarrow.Array | pyarrow.ChunkedArray ) -> BooleanArray: """ Construct BooleanArray from pyarrow Array/ChunkedArray. @@ -118,7 +118,7 @@ def __from_arrow__( def coerce_to_array( values, mask=None, copy: bool = False -) -> Tuple[np.ndarray, np.ndarray]: +) -> tuple[np.ndarray, np.ndarray]: """ Coerce the input values array to numpy arrays with a mask. @@ -277,7 +277,7 @@ def dtype(self) -> BooleanDtype: @classmethod def _from_sequence( - cls, scalars, *, dtype: Optional[Dtype] = None, copy: bool = False + cls, scalars, *, dtype: Dtype | None = None, copy: bool = False ) -> BooleanArray: if dtype: assert dtype == "boolean" @@ -287,12 +287,12 @@ def _from_sequence( @classmethod def _from_sequence_of_strings( cls, - strings: List[str], + strings: list[str], *, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, copy: bool = False, - true_values: Optional[List[str]] = None, - false_values: Optional[List[str]] = None, + true_values: list[str] | None = None, + false_values: list[str] | None = None, ) -> BooleanArray: true_values_union = cls._TRUE_VALUES.union(true_values or []) false_values_union = cls._FALSE_VALUES.union(false_values or []) @@ -357,7 +357,7 @@ def reconstruct(x): else: return reconstruct(result) - def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]: + def _coerce_to_array(self, value) -> tuple[np.ndarray, np.ndarray]: return coerce_to_array(value) def astype(self, dtype, copy: bool = True) -> ArrayLike: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 48316373a1140..696100545e3c5 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -321,7 +321,7 @@ def __init__( values, categories=None, ordered=None, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, fastpath=False, copy: bool = True, ): @@ -422,11 +422,11 @@ def dtype(self) -> CategoricalDtype: return self._dtype @property - def _constructor(self) -> Type[Categorical]: + def _constructor(self) -> type[Categorical]: return Categorical @classmethod - def _from_sequence(cls, scalars, *, dtype: Optional[Dtype] = None, copy=False): + def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False): return Categorical(scalars, dtype=dtype, copy=copy) def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: @@ -486,7 +486,7 @@ def itemsize(self) -> int: """ return self.categories.itemsize - def tolist(self) -> List[Scalar]: + def tolist(self) -> list[Scalar]: """ Return a list of the values. @@ -562,7 +562,7 @@ def _from_inferred_categories( @classmethod def from_codes( - cls, codes, categories=None, ordered=None, dtype: Optional[Dtype] = None + cls, codes, categories=None, ordered=None, dtype: Dtype | None = None ): """ Make a Categorical type from codes and categories or dtype. @@ -1299,7 +1299,7 @@ def _validate_fill_value(self, fill_value): # ------------------------------------------------------------- - def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray: + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: """ The numpy array interface. @@ -1933,7 +1933,7 @@ def _validate_setitem_value(self, value): codes = self.categories.get_indexer(rvalue) return codes.astype(self._ndarray.dtype, copy=False) - def _reverse_indexer(self) -> Dict[Hashable, np.ndarray]: + def _reverse_indexer(self) -> dict[Hashable, np.ndarray]: """ Compute the inverse of a categorical, returning a dict of categories -> indexers. @@ -2154,7 +2154,7 @@ def equals(self, other: object) -> bool: @classmethod def _concat_same_type( - cls: Type[CategoricalT], to_concat: Sequence[CategoricalT], axis: int = 0 + cls: type[CategoricalT], to_concat: Sequence[CategoricalT], axis: int = 0 ) -> CategoricalT: from pandas.core.dtypes.concat import union_categoricals diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 1032559766ada..1c427305b5c0b 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -103,25 +103,25 @@ class DatetimeLikeArrayMixin(OpsMixin, NDArrayBackedExtensionArray): """ # _infer_matches -> which infer_dtype strings are close enough to our own - _infer_matches: Tuple[str, ...] + _infer_matches: tuple[str, ...] _is_recognized_dtype: Callable[[DtypeObj], bool] - _recognized_scalars: Tuple[Type, ...] + _recognized_scalars: tuple[type, ...] _data: np.ndarray - def __init__(self, data, dtype: Optional[Dtype] = None, freq=None, copy=False): + def __init__(self, data, dtype: Dtype | None = None, freq=None, copy=False): raise AbstractMethodError(self) @classmethod def _simple_new( - cls: Type[DatetimeLikeArrayT], + cls: type[DatetimeLikeArrayT], values: np.ndarray, - freq: Optional[BaseOffset] = None, - dtype: Optional[Dtype] = None, + freq: BaseOffset | None = None, + dtype: Dtype | None = None, ) -> DatetimeLikeArrayT: raise AbstractMethodError(cls) @property - def _scalar_type(self) -> Type[DatetimeLikeScalar]: + def _scalar_type(self) -> type[DatetimeLikeScalar]: """ The scalar associated with this datelike @@ -153,7 +153,7 @@ def _scalar_from_string(self, value: str) -> DTScalarOrNaT: def _unbox_scalar( self, value: DTScalarOrNaT, setitem: bool = False - ) -> Union[np.int64, np.datetime64, np.timedelta64]: + ) -> np.int64 | np.datetime64 | np.timedelta64: """ Unbox the integer value of a scalar `value`. @@ -266,15 +266,15 @@ def _formatter(self, boxed=False): # ---------------------------------------------------------------- # Array-Like / EA-Interface Methods - def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray: + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: # used for Timedelta/DatetimeArray, overwritten by PeriodArray if is_object_dtype(dtype): return np.array(list(self), dtype=object) return self._ndarray def __getitem__( - self, key: Union[int, slice, np.ndarray] - ) -> Union[DatetimeLikeArrayMixin, DTScalarOrNaT]: + self, key: int | slice | np.ndarray + ) -> DatetimeLikeArrayMixin | DTScalarOrNaT: """ This getitem defers to the underlying array, which by-definition can only handle list-likes, slices, and integer scalars @@ -315,8 +315,8 @@ def _get_getitem_freq(self, key): def __setitem__( self, - key: Union[int, Sequence[int], Sequence[bool], slice], - value: Union[NaTType, Any, Sequence[Any]], + key: int | Sequence[int] | Sequence[bool] | slice, + value: NaTType | Any | Sequence[Any], ) -> None: # I'm fudging the types a bit here. "Any" above really depends # on type(self). For PeriodArray, it's Period (or stuff coercible @@ -384,7 +384,7 @@ def astype(self, dtype, copy=True): else: return np.asarray(self, dtype=dtype) - def view(self, dtype: Optional[Dtype] = None): + def view(self, dtype: Dtype | None = None): if dtype is None or dtype is self.dtype: return type(self)(self._ndarray, dtype=self.dtype) return self._ndarray.view(dtype=dtype) @@ -394,7 +394,7 @@ def view(self, dtype: Optional[Dtype] = None): @classmethod def _concat_same_type( - cls: Type[DatetimeLikeArrayT], + cls: type[DatetimeLikeArrayT], to_concat: Sequence[DatetimeLikeArrayT], axis: int = 0, ) -> DatetimeLikeArrayT: @@ -429,7 +429,7 @@ def _values_for_factorize(self): @classmethod def _from_factorized( - cls: Type[DatetimeLikeArrayT], values, original + cls: type[DatetimeLikeArrayT], values, original ) -> DatetimeLikeArrayT: return cls(values, dtype=original.dtype) @@ -674,7 +674,7 @@ def _validate_setitem_value(self, value): def _unbox( self, other, setitem: bool = False - ) -> Union[np.int64, np.datetime64, np.timedelta64, np.ndarray]: + ) -> np.int64 | np.datetime64 | np.timedelta64 | np.ndarray: """ Unbox either a scalar with _unbox_scalar or an instance of our own type. """ @@ -847,7 +847,7 @@ def inferred_freq(self): return None @property # NB: override with cache_readonly in immutable subclasses - def _resolution_obj(self) -> Optional[Resolution]: + def _resolution_obj(self) -> Resolution | None: try: return Resolution.get_reso_from_freq(self.freqstr) except KeyError: @@ -902,7 +902,7 @@ def _validate_frequency(cls, index, freq, **kwargs): @classmethod def _generate_range( - cls: Type[DatetimeLikeArrayT], start, end, periods, freq, *args, **kwargs + cls: type[DatetimeLikeArrayT], start, end, periods, freq, *args, **kwargs ) -> DatetimeLikeArrayT: raise AbstractMethodError(cls) @@ -1379,7 +1379,7 @@ def max(self, *, axis=None, skipna=True, **kwargs): result = nanops.nanmax(self._ndarray, axis=axis, skipna=skipna) return self._wrap_reduction_result(axis, result) - def mean(self, *, skipna=True, axis: Optional[int] = 0): + def mean(self, *, skipna=True, axis: int | None = 0): """ Return the mean value of the Array. @@ -1418,7 +1418,7 @@ def mean(self, *, skipna=True, axis: Optional[int] = 0): ) return self._wrap_reduction_result(axis, result) - def median(self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs): + def median(self, *, axis: int | None = None, skipna: bool = True, **kwargs): nv.validate_median((), kwargs) if axis is not None and abs(axis) >= self.ndim: @@ -1625,11 +1625,11 @@ def ceil(self, freq, ambiguous="raise", nonexistent="raise"): # -------------------------------------------------------------- # Reductions - def any(self, *, axis: Optional[int] = None, skipna: bool = True): + def any(self, *, axis: int | None = None, skipna: bool = True): # GH#34479 discussion of desired behavior long-term return nanops.nanany(self._ndarray, axis=axis, skipna=skipna, mask=self.isna()) - def all(self, *, axis: Optional[int] = None, skipna: bool = True): + def all(self, *, axis: int | None = None, skipna: bool = True): # GH#34479 discussion of desired behavior long-term return nanops.nanall(self._ndarray, axis=axis, skipna=skipna, mask=self.isna()) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 144a7186f5826..d1f55fd862f94 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -215,7 +215,7 @@ class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): # ----------------------------------------------------------------- # Constructors - _dtype: Union[np.dtype, DatetimeTZDtype] + _dtype: np.dtype | DatetimeTZDtype _freq = None def __init__(self, values, dtype=DT64NS_DTYPE, freq=None, copy=False): @@ -292,7 +292,7 @@ def __init__(self, values, dtype=DT64NS_DTYPE, freq=None, copy=False): @classmethod def _simple_new( - cls, values, freq: Optional[BaseOffset] = None, dtype=DT64NS_DTYPE + cls, values, freq: BaseOffset | None = None, dtype=DT64NS_DTYPE ) -> DatetimeArray: assert isinstance(values, np.ndarray) if values.dtype != DT64NS_DTYPE: @@ -484,11 +484,11 @@ def _check_compatible_with(self, other, setitem: bool = False): # ----------------------------------------------------------------- # Descriptive Properties - def _box_func(self, x) -> Union[Timestamp, NaTType]: + def _box_func(self, x) -> Timestamp | NaTType: return Timestamp(x, freq=self.freq, tz=self.tz) @property - def dtype(self) -> Union[np.dtype, DatetimeTZDtype]: + def dtype(self) -> np.dtype | DatetimeTZDtype: """ The dtype for the DatetimeArray. @@ -2175,8 +2175,8 @@ def maybe_convert_dtype(data, copy): def _maybe_infer_tz( - tz: Optional[tzinfo], inferred_tz: Optional[tzinfo] -) -> Optional[tzinfo]: + tz: tzinfo | None, inferred_tz: tzinfo | None +) -> tzinfo | None: """ If a timezone is inferred from data, check that it is compatible with the user-provided timezone, if any. @@ -2248,7 +2248,7 @@ def _validate_dt64_dtype(dtype): return dtype -def validate_tz_from_dtype(dtype, tz: Optional[tzinfo]) -> Optional[tzinfo]: +def validate_tz_from_dtype(dtype, tz: tzinfo | None) -> tzinfo | None: """ If the given dtype is a DatetimeTZDtype, extract the implied tzinfo object from it and check that it does not conflict with the given @@ -2296,8 +2296,8 @@ def validate_tz_from_dtype(dtype, tz: Optional[tzinfo]) -> Optional[tzinfo]: def _infer_tz_from_endpoints( - start: Timestamp, end: Timestamp, tz: Optional[tzinfo] -) -> Optional[tzinfo]: + start: Timestamp, end: Timestamp, tz: tzinfo | None +) -> tzinfo | None: """ If a timezone is not explicitly given via `tz`, see if one can be inferred from the `start` and `end` endpoints. If more than one @@ -2339,7 +2339,7 @@ def _infer_tz_from_endpoints( def _maybe_normalize_endpoints( - start: Optional[Timestamp], end: Optional[Timestamp], normalize: bool + start: Timestamp | None, end: Timestamp | None, normalize: bool ): _normalized = True diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index 2c3b3d3c2f0b4..145d42c9b0b65 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -47,7 +47,7 @@ def _is_numeric(self) -> bool: return True @classmethod - def construct_array_type(cls) -> Type[FloatingArray]: + def construct_array_type(cls) -> type[FloatingArray]: """ Return the array type associated with this dtype. @@ -57,7 +57,7 @@ def construct_array_type(cls) -> Type[FloatingArray]: """ return FloatingArray - def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]: + def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: # for now only handle other floating types if not all(isinstance(t, FloatingDtype) for t in dtypes): return None @@ -71,7 +71,7 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]: def coerce_to_array( values, dtype=None, mask=None, copy: bool = False -) -> Tuple[np.ndarray, np.ndarray]: +) -> tuple[np.ndarray, np.ndarray]: """ Coerce the input values array to numpy arrays with a mask. @@ -255,7 +255,7 @@ def _from_sequence_of_strings( scalars = to_numeric(strings, errors="raise") return cls._from_sequence(scalars, dtype=dtype, copy=copy) - def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]: + def _coerce_to_array(self, value) -> tuple[np.ndarray, np.ndarray]: return coerce_to_array(value, dtype=self.dtype) def astype(self, dtype, copy: bool = True) -> ArrayLike: diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index ff1af80f81ac6..913d2a001dd23 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -57,7 +57,7 @@ def _is_numeric(self) -> bool: return True @classmethod - def construct_array_type(cls) -> Type[IntegerArray]: + def construct_array_type(cls) -> type[IntegerArray]: """ Return the array type associated with this dtype. @@ -67,7 +67,7 @@ def construct_array_type(cls) -> Type[IntegerArray]: """ return IntegerArray - def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]: + def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: # we only handle nullable EA dtypes and numeric numpy dtypes if not all( isinstance(t, BaseMaskedDtype) @@ -112,7 +112,7 @@ def safe_cast(values, dtype, copy: bool): def coerce_to_array( values, dtype, mask=None, copy: bool = False -) -> Tuple[np.ndarray, np.ndarray]: +) -> tuple[np.ndarray, np.ndarray]: """ Coerce the input values array to numpy arrays with a mask @@ -304,19 +304,19 @@ def __abs__(self): @classmethod def _from_sequence( - cls, scalars, *, dtype: Optional[Dtype] = None, copy: bool = False + cls, scalars, *, dtype: Dtype | None = None, copy: bool = False ) -> IntegerArray: values, mask = coerce_to_array(scalars, dtype=dtype, copy=copy) return IntegerArray(values, mask) @classmethod def _from_sequence_of_strings( - cls, strings, *, dtype: Optional[Dtype] = None, copy: bool = False + cls, strings, *, dtype: Dtype | None = None, copy: bool = False ) -> IntegerArray: scalars = to_numeric(strings, errors="raise") return cls._from_sequence(scalars, dtype=dtype, copy=copy) - def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]: + def _coerce_to_array(self, value) -> tuple[np.ndarray, np.ndarray]: return coerce_to_array(value, dtype=self.dtype) def astype(self, dtype, copy: bool = True) -> ArrayLike: @@ -543,7 +543,7 @@ class UInt64Dtype(_IntegerDtype): __doc__ = _dtype_docstring.format(dtype="uint64") -INT_STR_TO_DTYPE: Dict[str, _IntegerDtype] = { +INT_STR_TO_DTYPE: dict[str, _IntegerDtype] = { "int8": Int8Dtype(), "int16": Int16Dtype(), "int32": Int32Dtype(), diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 882ca0955bc99..5eca071917a98 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -172,7 +172,7 @@ def __new__( cls, data, closed=None, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, copy: bool = False, verify_integrity: bool = True, ): @@ -219,7 +219,7 @@ def _simple_new( right, closed=None, copy=False, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, verify_integrity=True, ): result = IntervalMixin.__new__(cls) @@ -301,7 +301,7 @@ def _simple_new( @classmethod def _from_sequence( - cls, scalars, *, dtype: Optional[Dtype] = None, copy: bool = False + cls, scalars, *, dtype: Dtype | None = None, copy: bool = False ): return cls(scalars, dtype=dtype, copy=copy) @@ -362,7 +362,7 @@ def _from_factorized(cls, values, original): } ) def from_breaks( - cls, breaks, closed="right", copy: bool = False, dtype: Optional[Dtype] = None + cls, breaks, closed="right", copy: bool = False, dtype: Dtype | None = None ): breaks = maybe_convert_platform_interval(breaks) @@ -433,7 +433,7 @@ def from_breaks( } ) def from_arrays( - cls, left, right, closed="right", copy=False, dtype: Optional[Dtype] = None + cls, left, right, closed="right", copy=False, dtype: Dtype | None = None ): left = maybe_convert_platform_interval(left) right = maybe_convert_platform_interval(right) @@ -492,7 +492,7 @@ def from_arrays( } ) def from_tuples( - cls, data, closed="right", copy=False, dtype: Optional[Dtype] = None + cls, data, closed="right", copy=False, dtype: Dtype | None = None ): if len(data): left, right = [], [] @@ -824,7 +824,7 @@ def equals(self, other) -> bool: @classmethod def _concat_same_type( - cls: Type[IntervalArrayT], to_concat: Sequence[IntervalArrayT] + cls: type[IntervalArrayT], to_concat: Sequence[IntervalArrayT] ) -> IntervalArrayT: """ Concatenate multiple IntervalArray @@ -1314,7 +1314,7 @@ def is_non_overlapping_monotonic(self): # --------------------------------------------------------------------- # Conversion - def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray: + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: """ Return the IntervalArray's data as a numpy array of Interval objects (with dtype='object') diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index a6ed75c65b2e9..44292a7311611 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -42,7 +42,7 @@ class BaseMaskedDtype(ExtensionDtype): name: str base = None - type: Type + type: type na_value = libmissing.NA @@ -61,7 +61,7 @@ def itemsize(self) -> int: return self.numpy_dtype.itemsize @classmethod - def construct_array_type(cls) -> Type[BaseMaskedArray]: + def construct_array_type(cls) -> type[BaseMaskedArray]: """ Return the array type associated with this dtype. @@ -106,8 +106,8 @@ def dtype(self) -> BaseMaskedDtype: raise AbstractMethodError(self) def __getitem__( - self, item: Union[int, slice, np.ndarray] - ) -> Union[BaseMaskedArray, Any]: + self, item: int | slice | np.ndarray + ) -> BaseMaskedArray | Any: if is_integer(item): if self._mask[item]: return self.dtype.na_value @@ -117,7 +117,7 @@ def __getitem__( return type(self)(self._data[item], self._mask[item]) - def _coerce_to_array(self, values) -> Tuple[np.ndarray, np.ndarray]: + def _coerce_to_array(self, values) -> tuple[np.ndarray, np.ndarray]: raise AbstractMethodError(self) def __setitem__(self, key, value) -> None: @@ -149,7 +149,7 @@ def __invert__(self: BaseMaskedArrayT) -> BaseMaskedArrayT: def to_numpy( self, - dtype: Optional[NpDtype] = None, + dtype: NpDtype | None = None, copy: bool = False, na_value: Scalar = lib.no_default, ) -> np.ndarray: @@ -261,7 +261,7 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: __array_priority__ = 1000 # higher than ndarray so ops dispatch to us - def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray: + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: """ the array interface, return my values We return an object array here to preserve our scalar values @@ -296,7 +296,7 @@ def nbytes(self) -> int: @classmethod def _concat_same_type( - cls: Type[BaseMaskedArrayT], to_concat: Sequence[BaseMaskedArrayT] + cls: type[BaseMaskedArrayT], to_concat: Sequence[BaseMaskedArrayT] ) -> BaseMaskedArrayT: data = np.concatenate([x._data for x in to_concat]) mask = np.concatenate([x._mask for x in to_concat]) @@ -307,7 +307,7 @@ def take( indexer, *, allow_fill: bool = False, - fill_value: Optional[Scalar] = None, + fill_value: Scalar | None = None, ) -> BaseMaskedArrayT: # we always fill with 1 internally # to avoid upcasting @@ -349,7 +349,7 @@ def copy(self: BaseMaskedArrayT) -> BaseMaskedArrayT: return type(self)(data, mask, copy=False) @doc(ExtensionArray.factorize) - def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, ExtensionArray]: + def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, ExtensionArray]: arr = self._data mask = self._mask diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py index 49f0d7e66c005..e004ba4f2946b 100644 --- a/pandas/core/arrays/numeric.py +++ b/pandas/core/arrays/numeric.py @@ -27,7 +27,7 @@ class NumericDtype(BaseMaskedDtype): def __from_arrow__( - self, array: Union[pyarrow.Array, pyarrow.ChunkedArray] + self, array: pyarrow.Array | pyarrow.ChunkedArray ) -> BaseMaskedArray: """ Construct IntegerArray/FloatingArray from pyarrow Array/ChunkedArray. @@ -158,7 +158,7 @@ def __array_ufunc__(self, ufunc, method: str, *inputs, **kwargs): return result mask = np.zeros(len(self), dtype=bool) - inputs2: List[Any] = [] + inputs2: list[Any] = [] for x in inputs: if isinstance(x, NumericArray): mask |= x._mask diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 9999a9ed411d8..7ce1bf11fbe3a 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -60,7 +60,7 @@ class PandasArray( # ------------------------------------------------------------------------ # Constructors - def __init__(self, values: Union[np.ndarray, PandasArray], copy: bool = False): + def __init__(self, values: np.ndarray | PandasArray, copy: bool = False): if isinstance(values, type(self)): values = values._ndarray if not isinstance(values, np.ndarray): @@ -80,7 +80,7 @@ def __init__(self, values: Union[np.ndarray, PandasArray], copy: bool = False): @classmethod def _from_sequence( - cls, scalars, *, dtype: Optional[Dtype] = None, copy: bool = False + cls, scalars, *, dtype: Dtype | None = None, copy: bool = False ) -> PandasArray: if isinstance(dtype, PandasDtype): dtype = dtype._dtype @@ -107,7 +107,7 @@ def dtype(self) -> PandasDtype: # ------------------------------------------------------------------------ # NumPy Array Interface - def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray: + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: return np.asarray(self._ndarray, dtype=dtype) _HANDLED_TYPES = (np.ndarray, numbers.Number) @@ -174,7 +174,7 @@ def _validate_fill_value(self, fill_value): fill_value = self.dtype.na_value return fill_value - def _values_for_factorize(self) -> Tuple[np.ndarray, int]: + def _values_for_factorize(self) -> tuple[np.ndarray, int]: return self._ndarray, -1 # ------------------------------------------------------------------------ @@ -222,7 +222,7 @@ def mean( self, *, axis=None, - dtype: Optional[NpDtype] = None, + dtype: NpDtype | None = None, out=None, keepdims=False, skipna=True, @@ -244,7 +244,7 @@ def std( self, *, axis=None, - dtype: Optional[NpDtype] = None, + dtype: NpDtype | None = None, out=None, ddof=1, keepdims=False, @@ -260,7 +260,7 @@ def var( self, *, axis=None, - dtype: Optional[NpDtype] = None, + dtype: NpDtype | None = None, out=None, ddof=1, keepdims=False, @@ -276,7 +276,7 @@ def sem( self, *, axis=None, - dtype: Optional[NpDtype] = None, + dtype: NpDtype | None = None, out=None, ddof=1, keepdims=False, @@ -292,7 +292,7 @@ def kurt( self, *, axis=None, - dtype: Optional[NpDtype] = None, + dtype: NpDtype | None = None, out=None, keepdims=False, skipna=True, @@ -307,7 +307,7 @@ def skew( self, *, axis=None, - dtype: Optional[NpDtype] = None, + dtype: NpDtype | None = None, out=None, keepdims=False, skipna=True, @@ -323,7 +323,7 @@ def skew( def to_numpy( self, - dtype: Optional[NpDtype] = None, + dtype: NpDtype | None = None, copy: bool = False, na_value=lib.no_default, ) -> np.ndarray: diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index e0149f27ad6a6..7e4fa25c8bc13 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -135,7 +135,7 @@ class PeriodArray(PeriodMixin, dtl.DatelikeOps): _infer_matches = ("period",) # Names others delegate to us - _other_ops: List[str] = [] + _other_ops: list[str] = [] _bool_ops = ["is_leap_year"] _object_ops = ["start_time", "end_time", "freq"] _field_ops = [ @@ -163,7 +163,7 @@ class PeriodArray(PeriodMixin, dtl.DatelikeOps): # -------------------------------------------------------------------- # Constructors - def __init__(self, values, dtype: Optional[Dtype] = None, freq=None, copy=False): + def __init__(self, values, dtype: Dtype | None = None, freq=None, copy=False): freq = validate_dtype_freq(dtype, freq) if freq is not None: @@ -192,8 +192,8 @@ def __init__(self, values, dtype: Optional[Dtype] = None, freq=None, copy=False) def _simple_new( cls, values: np.ndarray, - freq: Optional[BaseOffset] = None, - dtype: Optional[Dtype] = None, + freq: BaseOffset | None = None, + dtype: Dtype | None = None, ) -> PeriodArray: # alias for PeriodArray.__init__ assertion_msg = "Should be numpy array of type i8" @@ -202,10 +202,10 @@ def _simple_new( @classmethod def _from_sequence( - cls: Type[PeriodArray], - scalars: Union[Sequence[Optional[Period]], AnyArrayLike], + cls: type[PeriodArray], + scalars: Sequence[Period | None] | AnyArrayLike, *, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, copy: bool = False, ) -> PeriodArray: if dtype and isinstance(dtype, PeriodDtype): @@ -227,7 +227,7 @@ def _from_sequence( @classmethod def _from_sequence_of_strings( - cls, strings, *, dtype: Optional[Dtype] = None, copy=False + cls, strings, *, dtype: Dtype | None = None, copy=False ) -> PeriodArray: return cls._from_sequence(strings, dtype=dtype, copy=copy) @@ -274,7 +274,7 @@ def _generate_range(cls, start, end, periods, freq, fields): # DatetimeLike Interface def _unbox_scalar( - self, value: Union[Period, NaTType], setitem: bool = False + self, value: Period | NaTType, setitem: bool = False ) -> np.int64: if value is NaT: return np.int64(value.value) @@ -307,7 +307,7 @@ def freq(self) -> BaseOffset: """ return self.dtype.freq - def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray: + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: if dtype == "i8": return self.asi8 elif dtype == bool: @@ -494,7 +494,7 @@ def _time_shift(self, periods, freq=None): values[self._isnan] = iNaT return type(self)(values, freq=self.freq) - def _box_func(self, x) -> Union[Period, NaTType]: + def _box_func(self, x) -> Period | NaTType: return Period._from_ordinal(ordinal=x, freq=self.freq) @doc(**_shared_doc_kwargs, other="PeriodIndex", other_name="PeriodIndex") @@ -830,8 +830,8 @@ def raise_on_incompatible(left, right): def period_array( - data: Union[Sequence[Optional[Period]], AnyArrayLike], - freq: Optional[Union[str, Tick]] = None, + data: Sequence[Period | None] | AnyArrayLike, + freq: str | Tick | None = None, copy: bool = False, ) -> PeriodArray: """ @@ -900,7 +900,7 @@ def period_array( arrdata = np.asarray(data) - dtype: Optional[PeriodDtype] + dtype: PeriodDtype | None if freq: dtype = PeriodDtype(freq) else: diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 2c69096e56973..cbdd3569e2042 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -176,7 +176,7 @@ def _sparse_array_op( return _wrap_result(name, result, index, fill, dtype=result_dtype) -def _wrap_result(name, data, sparse_index, fill_value, dtype: Optional[Dtype] = None): +def _wrap_result(name, data, sparse_index, fill_value, dtype: Dtype | None = None): """ wrap op result to have correct dtype """ @@ -283,7 +283,7 @@ def __init__( index=None, fill_value=None, kind="integer", - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, copy=False, ): @@ -399,7 +399,7 @@ def __init__( @classmethod def _simple_new( - cls: Type[SparseArrayT], + cls: type[SparseArrayT], sparse_array: np.ndarray, sparse_index: SparseIndex, dtype: SparseDtype, @@ -456,7 +456,7 @@ def from_spmatrix(cls, data): return cls._simple_new(arr, index, dtype) - def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray: + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: fill_value = self.fill_value if self.sp_index.ngaps == 0: @@ -489,7 +489,7 @@ def __setitem__(self, key, value): raise TypeError(msg) @classmethod - def _from_sequence(cls, scalars, *, dtype: Optional[Dtype] = None, copy=False): + def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False): return cls(scalars, dtype=dtype) @classmethod @@ -896,7 +896,7 @@ def _take_with_fill(self, indices, fill_value=None) -> np.ndarray: return taken - def _take_without_fill(self, indices) -> Union[np.ndarray, SparseArray]: + def _take_without_fill(self, indices) -> np.ndarray | SparseArray: to_shift = indices < 0 indices = indices.copy() @@ -948,7 +948,7 @@ def copy(self: SparseArrayT) -> SparseArrayT: @classmethod def _concat_same_type( - cls: Type[SparseArrayT], to_concat: Sequence[SparseArrayT] + cls: type[SparseArrayT], to_concat: Sequence[SparseArrayT] ) -> SparseArrayT: fill_value = to_concat[0].fill_value @@ -1000,7 +1000,7 @@ def _concat_same_type( return cls(data, sparse_index=sp_index, fill_value=fill_value) - def astype(self, dtype: Optional[Dtype] = None, copy=True): + def astype(self, dtype: Dtype | None = None, copy=True): """ Change the dtype of a SparseArray. @@ -1464,7 +1464,7 @@ def _formatter(self, boxed=False): def make_sparse( - arr: np.ndarray, kind="block", fill_value=None, dtype: Optional[NpDtype] = None + arr: np.ndarray, kind="block", fill_value=None, dtype: NpDtype | None = None ): """ Convert ndarray to sparse format diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py index 4c1c1b42ff6fa..b29042870ddc6 100644 --- a/pandas/core/arrays/sparse/dtype.py +++ b/pandas/core/arrays/sparse/dtype.py @@ -173,7 +173,7 @@ def __repr__(self) -> str: return self.name @classmethod - def construct_array_type(cls) -> Type[SparseArray]: + def construct_array_type(cls) -> type[SparseArray]: """ Return the array type associated with this dtype. @@ -238,7 +238,7 @@ def construct_from_string(cls, string: str) -> SparseDtype: raise TypeError(msg) @staticmethod - def _parse_subtype(dtype: str) -> Tuple[str, bool]: + def _parse_subtype(dtype: str) -> tuple[str, bool]: """ Parse a string to get the subtype @@ -359,7 +359,7 @@ def _subtype_with_str(self): return type(self.fill_value) return self.subtype - def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]: + def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: # TODO for now only handle SparseDtypes and numpy dtypes => extend # with other compatibtle extension dtypes if any( diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 2e4580207bc8a..9b8604e8699e3 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -67,11 +67,11 @@ class StringDtype(ExtensionDtype): na_value = libmissing.NA @property - def type(self) -> Type[str]: + def type(self) -> type[str]: return str @classmethod - def construct_array_type(cls) -> Type[StringArray]: + def construct_array_type(cls) -> type[StringArray]: """ Return the array type associated with this dtype. @@ -85,7 +85,7 @@ def __repr__(self) -> str: return "StringDtype" def __from_arrow__( - self, array: Union[pyarrow.Array, pyarrow.ChunkedArray] + self, array: pyarrow.Array | pyarrow.ChunkedArray ) -> StringArray: """ Construct StringArray from pyarrow Array/ChunkedArray. @@ -208,7 +208,7 @@ def _validate(self): ) @classmethod - def _from_sequence(cls, scalars, *, dtype: Optional[Dtype] = None, copy=False): + def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False): if dtype: assert dtype == "string" @@ -237,7 +237,7 @@ def _from_sequence(cls, scalars, *, dtype: Optional[Dtype] = None, copy=False): @classmethod def _from_sequence_of_strings( - cls, strings, *, dtype: Optional[Dtype] = None, copy=False + cls, strings, *, dtype: Dtype | None = None, copy=False ): return cls._from_sequence(strings, dtype=dtype, copy=copy) @@ -385,7 +385,7 @@ def _cmp_method(self, other, op): # String methods interface _str_na_value = StringDtype.na_value - def _str_map(self, f, na_value=None, dtype: Optional[Dtype] = None): + def _str_map(self, f, na_value=None, dtype: Dtype | None = None): from pandas.arrays import BooleanArray, IntegerArray, StringArray from pandas.core.arrays.string_ import StringDtype @@ -398,7 +398,7 @@ def _str_map(self, f, na_value=None, dtype: Optional[Dtype] = None): arr = np.asarray(self) if is_integer_dtype(dtype) or is_bool_dtype(dtype): - constructor: Union[Type[IntegerArray], Type[BooleanArray]] + constructor: type[IntegerArray] | type[BooleanArray] if is_integer_dtype(dtype): constructor = IntegerArray else: diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 252e9a84022db..164bf37b2b73d 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -83,11 +83,11 @@ class ArrowStringDtype(ExtensionDtype): na_value = libmissing.NA @property - def type(self) -> Type[str]: + def type(self) -> type[str]: return str @classmethod - def construct_array_type(cls) -> Type[ArrowStringArray]: + def construct_array_type(cls) -> type[ArrowStringArray]: """ Return the array type associated with this dtype. @@ -104,7 +104,7 @@ def __repr__(self) -> str: return "ArrowStringDtype" def __from_arrow__( - self, array: Union[pa.Array, pa.ChunkedArray] + self, array: pa.Array | pa.ChunkedArray ) -> ArrowStringArray: """ Construct StringArray from pyarrow Array/ChunkedArray. @@ -203,7 +203,7 @@ def _chk_pyarrow_available(cls) -> None: raise ImportError(msg) @classmethod - def _from_sequence(cls, scalars, dtype: Optional[Dtype] = None, copy=False): + def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy=False): cls._chk_pyarrow_available() # convert non-na-likes to str, and nan-likes to ArrowStringDtype.na_value scalars = lib.ensure_string_array(scalars, copy=False) @@ -211,7 +211,7 @@ def _from_sequence(cls, scalars, dtype: Optional[Dtype] = None, copy=False): @classmethod def _from_sequence_of_strings( - cls, strings, dtype: Optional[Dtype] = None, copy=False + cls, strings, dtype: Dtype | None = None, copy=False ): return cls._from_sequence(strings, dtype=dtype, copy=copy) @@ -222,7 +222,7 @@ def dtype(self) -> ArrowStringDtype: """ return self._dtype - def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray: + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: """Correctly construct numpy arrays when passed to `np.asarray()`.""" return self.to_numpy(dtype=dtype) @@ -232,7 +232,7 @@ def __arrow_array__(self, type=None): def to_numpy( self, - dtype: Optional[NpDtype] = None, + dtype: NpDtype | None = None, copy: bool = False, na_value=lib.no_default, ) -> np.ndarray: @@ -441,7 +441,7 @@ def _cmp_method(self, other, op): # TODO(ARROW-9429): Add a .to_numpy() to ChunkedArray return BooleanArray._from_sequence(result.to_pandas().values) - def __setitem__(self, key: Union[int, np.ndarray], value: Any) -> None: + def __setitem__(self, key: int | np.ndarray, value: Any) -> None: """Set one or more values inplace. Parameters diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index e9160c92435a4..69c34c128500f 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -111,8 +111,8 @@ class TimedeltaArray(dtl.TimelikeOps): __array_priority__ = 1000 # define my properties & methods for delegation - _other_ops: List[str] = [] - _bool_ops: List[str] = [] + _other_ops: list[str] = [] + _bool_ops: list[str] = [] _object_ops = ["freq"] _field_ops = ["days", "seconds", "microseconds", "nanoseconds"] _datetimelike_ops = _field_ops + _object_ops + _bool_ops @@ -127,7 +127,7 @@ class TimedeltaArray(dtl.TimelikeOps): # Note: ndim must be defined to ensure NaT.__richcmp__(TimedeltaArray) # operates pointwise. - def _box_func(self, x) -> Union[Timedelta, NaTType]: + def _box_func(self, x) -> Timedelta | NaTType: return Timedelta(x, unit="ns") @property @@ -207,7 +207,7 @@ def __init__(self, values, dtype=TD64NS_DTYPE, freq=lib.no_default, copy=False): @classmethod def _simple_new( - cls, values, freq: Optional[BaseOffset] = None, dtype=TD64NS_DTYPE + cls, values, freq: BaseOffset | None = None, dtype=TD64NS_DTYPE ) -> TimedeltaArray: assert dtype == TD64NS_DTYPE, dtype assert isinstance(values, np.ndarray), type(values) @@ -355,7 +355,7 @@ def sum( self, *, axis=None, - dtype: Optional[NpDtype] = None, + dtype: NpDtype | None = None, out=None, keepdims: bool = False, initial=None, @@ -375,7 +375,7 @@ def std( self, *, axis=None, - dtype: Optional[NpDtype] = None, + dtype: NpDtype | None = None, out=None, ddof: int = 1, keepdims: bool = False, diff --git a/pandas/core/computation/align.py b/pandas/core/computation/align.py index 5ad3e78a76866..696cbafd6f81a 100644 --- a/pandas/core/computation/align.py +++ b/pandas/core/computation/align.py @@ -24,10 +24,10 @@ def _align_core_single_unary_op( term, -) -> Tuple[Union[partial, Type[FrameOrSeries]], Optional[Dict[str, Index]]]: +) -> tuple[partial | type[FrameOrSeries], dict[str, Index] | None]: - typ: Union[partial, Type[FrameOrSeries]] - axes: Optional[Dict[str, Index]] = None + typ: partial | type[FrameOrSeries] + axes: dict[str, Index] | None = None if isinstance(term.value, np.ndarray): typ = partial(np.asanyarray, dtype=term.value.dtype) @@ -40,8 +40,8 @@ def _align_core_single_unary_op( def _zip_axes_from_type( - typ: Type[FrameOrSeries], new_axes: Sequence[Index] -) -> Dict[str, Index]: + typ: type[FrameOrSeries], new_axes: Sequence[Index] +) -> dict[str, Index]: return {name: new_axes[i] for i, name in enumerate(typ._AXIS_ORDERS)} diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py index 7b42b21cadc1f..4b8da87434ebe 100644 --- a/pandas/core/computation/ops.py +++ b/pandas/core/computation/ops.py @@ -59,7 +59,7 @@ class UndefinedVariableError(NameError): NameError subclass for local variables. """ - def __init__(self, name: str, is_local: Optional[bool] = None): + def __init__(self, name: str, is_local: bool | None = None): base_msg = f"{repr(name)} is not defined" if is_local: msg = f"local variable {base_msg}" @@ -205,7 +205,7 @@ class Op: op: str - def __init__(self, op: str, operands: Iterable[Union[Term, Op]], encoding=None): + def __init__(self, op: str, operands: Iterable[Term | Op], encoding=None): self.op = _bool_op_map.get(op, op) self.operands = operands self.encoding = encoding diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 6a3b95186d666..f3eeb073f3f42 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -26,14 +26,14 @@ class PyTablesScope(_scope.Scope): __slots__ = ("queryables",) - queryables: Dict[str, Any] + queryables: dict[str, Any] def __init__( self, level: int, global_dict=None, local_dict=None, - queryables: Optional[Dict[str, Any]] = None, + queryables: dict[str, Any] | None = None, ): super().__init__(level + 1, global_dict=global_dict, local_dict=local_dict) self.queryables = queryables or {} @@ -86,10 +86,10 @@ class BinOp(ops.BinOp): _max_selectors = 31 op: str - queryables: Dict[str, Any] - condition: Optional[str] + queryables: dict[str, Any] + condition: str | None - def __init__(self, op: str, lhs, rhs, queryables: Dict[str, Any], encoding): + def __init__(self, op: str, lhs, rhs, queryables: dict[str, Any], encoding): super().__init__(op, lhs, rhs) self.queryables = queryables self.encoding = encoding @@ -248,7 +248,7 @@ def convert_values(self): class FilterBinOp(BinOp): - filter: Optional[Tuple[Any, Any, Index]] = None + filter: tuple[Any, Any, Index] | None = None def __repr__(self) -> str: if self.filter is None: @@ -527,13 +527,13 @@ class PyTablesExpr(expr.Expr): "major_axis>=20130101" """ - _visitor: Optional[PyTablesExprVisitor] + _visitor: PyTablesExprVisitor | None env: PyTablesScope def __init__( self, where, - queryables: Optional[Dict[str, Any]] = None, + queryables: dict[str, Any] | None = None, encoding=None, scope_level: int = 0, ): diff --git a/pandas/core/computation/scope.py b/pandas/core/computation/scope.py index c2ba7f9892ef0..05df1e3005a4c 100644 --- a/pandas/core/computation/scope.py +++ b/pandas/core/computation/scope.py @@ -244,7 +244,7 @@ def swapkey(self, old_key: str, new_key: str, new_value=None): mapping[new_key] = new_value # type: ignore[index] return - def _get_vars(self, stack, scopes: List[str]): + def _get_vars(self, stack, scopes: list[str]): """ Get specifically scoped variables from a list of stack frames. diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 640c8d66807ad..595117e386971 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -53,8 +53,8 @@ def array( - data: Union[Sequence[object], AnyArrayLike], - dtype: Optional[Dtype] = None, + data: Sequence[object] | AnyArrayLike, + dtype: Dtype | None = None, copy: bool = True, ) -> ExtensionArray: """ @@ -346,7 +346,7 @@ def array( return PandasArray._from_sequence(data, dtype=dtype, copy=copy) -def extract_array(obj: object, extract_numpy: bool = False) -> Union[Any, ArrayLike]: +def extract_array(obj: object, extract_numpy: bool = False) -> Any | ArrayLike: """ Extract the ndarray or ExtensionArray from a Series or Index. @@ -431,8 +431,8 @@ def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray: def sanitize_array( data, - index: Optional[Index], - dtype: Optional[DtypeObj] = None, + index: Index | None, + dtype: DtypeObj | None = None, copy: bool = False, raise_cast_failure: bool = False, ) -> ArrayLike: @@ -516,7 +516,7 @@ def sanitize_array( def _sanitize_ndim( - result: ArrayLike, data, dtype: Optional[DtypeObj], index: Optional[Index] + result: ArrayLike, data, dtype: DtypeObj | None, index: Index | None ) -> ArrayLike: """ Ensure we have a 1-dimensional result array. @@ -542,7 +542,7 @@ def _sanitize_ndim( def _sanitize_str_dtypes( - result: np.ndarray, data, dtype: Optional[DtypeObj], copy: bool + result: np.ndarray, data, dtype: DtypeObj | None, copy: bool ) -> np.ndarray: """ Ensure we have a dtype that is supported by pandas. @@ -561,7 +561,7 @@ def _sanitize_str_dtypes( return result -def _maybe_repeat(arr: ArrayLike, index: Optional[Index]) -> ArrayLike: +def _maybe_repeat(arr: ArrayLike, index: Index | None) -> ArrayLike: """ If we have a length-1 array and an index describing how long we expect the result to be, repeat the array. @@ -572,7 +572,7 @@ def _maybe_repeat(arr: ArrayLike, index: Optional[Index]) -> ArrayLike: return arr -def _try_cast(arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bool): +def _try_cast(arr, dtype: DtypeObj | None, copy: bool, raise_cast_failure: bool): """ Convert input to numpy ndarray and optionally cast to a given dtype. @@ -652,9 +652,9 @@ def is_empty_data(data: Any) -> bool: def create_series_with_explicit_dtype( data: Any = None, - index: Optional[Union[ArrayLike, Index]] = None, - dtype: Optional[Dtype] = None, - name: Optional[str] = None, + index: ArrayLike | Index | None = None, + dtype: Dtype | None = None, + name: str | None = None, copy: bool = False, fastpath: bool = False, dtype_if_empty: Dtype = object, diff --git a/pandas/core/describe.py b/pandas/core/describe.py index dcafb3c3a8be5..0cf6387af4648 100644 --- a/pandas/core/describe.py +++ b/pandas/core/describe.py @@ -33,10 +33,10 @@ def describe_ndframe( *, obj: FrameOrSeries, - include: Optional[Union[str, Sequence[str]]], - exclude: Optional[Union[str, Sequence[str]]], + include: str | Sequence[str] | None, + exclude: str | Sequence[str] | None, datetime_is_numeric: bool, - percentiles: Optional[Sequence[float]], + percentiles: Sequence[float] | None, ) -> FrameOrSeries: """Describe series or dataframe. @@ -139,8 +139,8 @@ def __init__( self, obj: DataFrame, *, - include: Optional[Union[str, Sequence[str]]], - exclude: Optional[Union[str, Sequence[str]]], + include: str | Sequence[str] | None, + exclude: str | Sequence[str] | None, datetime_is_numeric: bool, ): self.include = include @@ -154,7 +154,7 @@ def __init__( def describe(self, percentiles: Sequence[float]) -> DataFrame: data = self._select_data() - ldesc: List[Series] = [] + ldesc: list[Series] = [] for _, series in data.items(): describe_func = select_describe_func(series, self.datetime_is_numeric) ldesc.append(describe_func(series, percentiles)) @@ -191,9 +191,9 @@ def _select_data(self): return data -def reorder_columns(ldesc: Sequence[Series]) -> List[Hashable]: +def reorder_columns(ldesc: Sequence[Series]) -> list[Hashable]: """Set a convenient order for rows for display.""" - names: List[Hashable] = [] + names: list[Hashable] = [] ldesc_indexes = sorted((x.index for x in ldesc), key=len) for idxnames in ldesc_indexes: for name in idxnames: @@ -365,7 +365,7 @@ def select_describe_func( return describe_categorical_1d -def refine_percentiles(percentiles: Optional[Sequence[float]]) -> Sequence[float]: +def refine_percentiles(percentiles: Sequence[float] | None) -> Sequence[float]: """Ensure that percentiles are unique and sorted. Parameters diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 887bbc052b5c9..8b62ad088f0b5 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -89,7 +89,7 @@ def __from_arrow__( provided for registering virtual subclasses. """ - _metadata: Tuple[str, ...] = () + _metadata: tuple[str, ...] = () def __str__(self) -> str: return self.name @@ -141,7 +141,7 @@ def na_value(self) -> object: return np.nan @property - def type(self) -> Type[Any]: + def type(self) -> type[Any]: """ The scalar type for the array, e.g. ``int`` @@ -178,7 +178,7 @@ def name(self) -> str: raise AbstractMethodError(self) @property - def names(self) -> Optional[List[str]]: + def names(self) -> list[str] | None: """ Ordered list of field names, or None if there are no fields. @@ -188,7 +188,7 @@ def names(self) -> Optional[List[str]]: return None @classmethod - def construct_array_type(cls) -> Type[ExtensionArray]: + def construct_array_type(cls) -> type[ExtensionArray]: """ Return the array type associated with this dtype. @@ -325,7 +325,7 @@ def _is_boolean(self) -> bool: """ return False - def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]: + def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: """ Return the common dtype, if one exists. @@ -354,7 +354,7 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]: return None -def register_extension_dtype(cls: Type[ExtensionDtype]) -> Type[ExtensionDtype]: +def register_extension_dtype(cls: type[ExtensionDtype]) -> type[ExtensionDtype]: """ Register an ExtensionType with pandas as class decorator. @@ -397,9 +397,9 @@ class Registry: """ def __init__(self): - self.dtypes: List[Type[ExtensionDtype]] = [] + self.dtypes: list[type[ExtensionDtype]] = [] - def register(self, dtype: Type[ExtensionDtype]) -> None: + def register(self, dtype: type[ExtensionDtype]) -> None: """ Parameters ---------- @@ -411,8 +411,8 @@ def register(self, dtype: Type[ExtensionDtype]) -> None: self.dtypes.append(dtype) def find( - self, dtype: Union[Type[ExtensionDtype], str] - ) -> Optional[Type[ExtensionDtype]]: + self, dtype: type[ExtensionDtype] | str + ) -> type[ExtensionDtype] | None: """ Parameters ---------- diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 0be3970159fbd..edf379fdb51ac 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -126,7 +126,7 @@ def is_nested_object(obj) -> bool: ) -def maybe_box_datetimelike(value: Scalar, dtype: Optional[Dtype] = None) -> Scalar: +def maybe_box_datetimelike(value: Scalar, dtype: Dtype | None = None) -> Scalar: """ Cast scalar to Timestamp or Timedelta if scalar is datetime-like and dtype is not object. @@ -189,7 +189,7 @@ def _disallow_mismatched_datetimelike(value, dtype: DtypeObj): raise TypeError(f"Cannot cast {repr(value)} to {dtype}") -def maybe_downcast_to_dtype(result, dtype: Union[str, np.dtype]): +def maybe_downcast_to_dtype(result, dtype: str | np.dtype): """ try to cast to the specified dtype (e.g. convert back to bool/int or could be an astype of float64->float32 @@ -398,7 +398,7 @@ def maybe_cast_result_dtype(dtype: DtypeObj, how: str) -> DtypeObj: def maybe_cast_to_extension_array( - cls: Type[ExtensionArray], obj: ArrayLike, dtype: Optional[ExtensionDtype] = None + cls: type[ExtensionArray], obj: ArrayLike, dtype: ExtensionDtype | None = None ) -> ArrayLike: """ Call to `_from_sequence` that returns the object unchanged on Exception. @@ -679,7 +679,7 @@ def _ensure_dtype_type(value, dtype: DtypeObj): return dtype.type(value) -def infer_dtype_from(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, Any]: +def infer_dtype_from(val, pandas_dtype: bool = False) -> tuple[DtypeObj, Any]: """ Interpret the dtype from a scalar or array. @@ -696,7 +696,7 @@ def infer_dtype_from(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, Any]: return infer_dtype_from_array(val, pandas_dtype=pandas_dtype) -def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, Any]: +def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj, Any]: """ Interpret the dtype from a scalar. @@ -785,7 +785,7 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, return dtype, val -def dict_compat(d: Dict[Scalar, Scalar]) -> Dict[Scalar, Scalar]: +def dict_compat(d: dict[Scalar, Scalar]) -> dict[Scalar, Scalar]: """ Convert datetimelike-keyed dicts to a Timestamp-keyed dict. @@ -803,7 +803,7 @@ def dict_compat(d: Dict[Scalar, Scalar]) -> Dict[Scalar, Scalar]: def infer_dtype_from_array( arr, pandas_dtype: bool = False -) -> Tuple[DtypeObj, ArrayLike]: +) -> tuple[DtypeObj, ArrayLike]: """ Infer the dtype from an array. @@ -895,7 +895,7 @@ def maybe_upcast( values: np.ndarray, fill_value: Scalar = np.nan, copy: bool = False, -) -> Tuple[np.ndarray, Scalar]: +) -> tuple[np.ndarray, Scalar]: """ Provide explicit type promotion and coercion. @@ -921,7 +921,7 @@ def maybe_upcast( return values, fill_value -def invalidate_string_dtypes(dtype_set: Set[DtypeObj]): +def invalidate_string_dtypes(dtype_set: set[DtypeObj]): """ Change string like dtypes to object for ``DataFrame.select_dtypes()``. @@ -1348,7 +1348,7 @@ def maybe_castable(arr: np.ndarray) -> bool: def maybe_infer_to_datetimelike( - value: Union[ArrayLike, Scalar], convert_dates: bool = False + value: ArrayLike | Scalar, convert_dates: bool = False ): """ we might have a array (or single object) that is datetime like, @@ -1453,7 +1453,7 @@ def try_timedelta(v): return value -def maybe_cast_to_datetime(value, dtype: Optional[DtypeObj]): +def maybe_cast_to_datetime(value, dtype: DtypeObj | None): """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT @@ -1594,7 +1594,7 @@ def sanitize_to_nanoseconds(values: np.ndarray) -> np.ndarray: return values -def find_common_type(types: List[DtypeObj]) -> DtypeObj: +def find_common_type(types: list[DtypeObj]) -> DtypeObj: """ Find a common data type among the given dtypes. @@ -1679,7 +1679,7 @@ def construct_2d_arraylike_from_scalar( def construct_1d_arraylike_from_scalar( - value: Scalar, length: int, dtype: Optional[DtypeObj] + value: Scalar, length: int, dtype: DtypeObj | None ) -> ArrayLike: """ create a np.ndarray / pandas type of specified shape and dtype @@ -1753,7 +1753,7 @@ def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray: def construct_1d_ndarray_preserving_na( - values: Sequence, dtype: Optional[DtypeObj] = None, copy: bool = False + values: Sequence, dtype: DtypeObj | None = None, copy: bool = False ) -> np.ndarray: """ Construct a new ndarray, coercing `values` to `dtype`, preserving NA. diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index deafc17f76e10..108b79c930d24 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -67,12 +67,12 @@ class PandasExtensionDtype(ExtensionDtype): subdtype = None str: str_type num = 100 - shape: Tuple[int, ...] = () + shape: tuple[int, ...] = () itemsize = 8 base = None isbuiltin = 0 isnative = 0 - _cache: Dict[str_type, PandasExtensionDtype] = {} + _cache: dict[str_type, PandasExtensionDtype] = {} def __str__(self) -> str_type: """ @@ -89,7 +89,7 @@ def __repr__(self) -> str_type: def __hash__(self) -> int: raise NotImplementedError("sub-classes should implement an __hash__ method") - def __getstate__(self) -> Dict[str_type, Any]: + def __getstate__(self) -> dict[str_type, Any]: # pickle support; we don't want to pickle the cache return {k: getattr(self, k, None) for k in self._metadata} @@ -163,19 +163,19 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): # TODO: Document public vs. private API name = "category" - type: Type[CategoricalDtypeType] = CategoricalDtypeType + type: type[CategoricalDtypeType] = CategoricalDtypeType kind: str_type = "O" str = "|O08" base = np.dtype("O") _metadata = ("categories", "ordered") - _cache: Dict[str_type, PandasExtensionDtype] = {} + _cache: dict[str_type, PandasExtensionDtype] = {} def __init__(self, categories=None, ordered: Ordered = False): self._finalize(categories, ordered, fastpath=False) @classmethod def _from_fastpath( - cls, categories=None, ordered: Optional[bool] = None + cls, categories=None, ordered: bool | None = None ) -> CategoricalDtype: self = cls.__new__(cls) self._finalize(categories, ordered, fastpath=True) @@ -198,8 +198,8 @@ def _from_values_or_dtype( cls, values=None, categories=None, - ordered: Optional[bool] = None, - dtype: Optional[Dtype] = None, + ordered: bool | None = None, + dtype: Dtype | None = None, ) -> CategoricalDtype: """ Construct dtype from the input parameters used in :class:`Categorical`. @@ -458,7 +458,7 @@ def _hash_categories(categories, ordered: Ordered = True) -> int: return np.bitwise_xor.reduce(hashed) @classmethod - def construct_array_type(cls) -> Type[Categorical]: + def construct_array_type(cls) -> type[Categorical]: """ Return the array type associated with this dtype. @@ -527,7 +527,7 @@ def validate_categories(categories, fastpath: bool = False): return categories def update_dtype( - self, dtype: Union[str_type, CategoricalDtype] + self, dtype: str_type | CategoricalDtype ) -> CategoricalDtype: """ Returns a CategoricalDtype with categories and ordered taken from dtype @@ -581,7 +581,7 @@ def _is_boolean(self) -> bool: return is_bool_dtype(self.categories) - def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]: + def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: from pandas.core.arrays.sparse import SparseDtype # check if we have all categorical dtype with identical categories @@ -650,7 +650,7 @@ class DatetimeTZDtype(PandasExtensionDtype): datetime64[ns, tzfile('/usr/share/zoneinfo/US/Central')] """ - type: Type[Timestamp] = Timestamp + type: type[Timestamp] = Timestamp kind: str_type = "M" str = "|M8[ns]" num = 101 @@ -658,9 +658,9 @@ class DatetimeTZDtype(PandasExtensionDtype): na_value = NaT _metadata = ("unit", "tz") _match = re.compile(r"(datetime64|M8)\[(?P.+), (?P.+)\]") - _cache: Dict[str_type, PandasExtensionDtype] = {} + _cache: dict[str_type, PandasExtensionDtype] = {} - def __init__(self, unit: Union[str_type, DatetimeTZDtype] = "ns", tz=None): + def __init__(self, unit: str_type | DatetimeTZDtype = "ns", tz=None): if isinstance(unit, DatetimeTZDtype): # error: "str" has no attribute "tz" unit, tz = unit.unit, unit.tz # type: ignore[attr-defined] @@ -707,7 +707,7 @@ def tz(self): return self._tz @classmethod - def construct_array_type(cls) -> Type[DatetimeArray]: + def construct_array_type(cls) -> type[DatetimeArray]: """ Return the array type associated with this dtype. @@ -817,14 +817,14 @@ class PeriodDtype(dtypes.PeriodDtypeBase, PandasExtensionDtype): period[M] """ - type: Type[Period] = Period + type: type[Period] = Period kind: str_type = "O" str = "|O08" base = np.dtype("O") num = 102 _metadata = ("freq",) _match = re.compile(r"(P|p)eriod\[(?P.+)\]") - _cache: Dict[str_type, PandasExtensionDtype] = {} + _cache: dict[str_type, PandasExtensionDtype] = {} def __new__(cls, freq=None): """ @@ -953,7 +953,7 @@ def is_dtype(cls, dtype: object) -> bool: return super().is_dtype(dtype) @classmethod - def construct_array_type(cls) -> Type[PeriodArray]: + def construct_array_type(cls) -> type[PeriodArray]: """ Return the array type associated with this dtype. @@ -966,7 +966,7 @@ def construct_array_type(cls) -> Type[PeriodArray]: return PeriodArray def __from_arrow__( - self, array: Union[pyarrow.Array, pyarrow.ChunkedArray] + self, array: pyarrow.Array | pyarrow.ChunkedArray ) -> PeriodArray: """ Construct PeriodArray from pyarrow Array/ChunkedArray. @@ -1029,9 +1029,9 @@ class IntervalDtype(PandasExtensionDtype): _match = re.compile( r"(I|i)nterval\[(?P[^,]+)(, (?P(right|left|both|neither)))?\]" ) - _cache: Dict[str_type, PandasExtensionDtype] = {} + _cache: dict[str_type, PandasExtensionDtype] = {} - def __new__(cls, subtype=None, closed: Optional[str_type] = None): + def __new__(cls, subtype=None, closed: str_type | None = None): from pandas.core.dtypes.common import is_string_dtype, pandas_dtype if closed is not None and closed not in {"right", "left", "both", "neither"}: @@ -1103,7 +1103,7 @@ def subtype(self): return self._subtype @classmethod - def construct_array_type(cls) -> Type[IntervalArray]: + def construct_array_type(cls) -> type[IntervalArray]: """ Return the array type associated with this dtype. @@ -1197,7 +1197,7 @@ def is_dtype(cls, dtype: object) -> bool: return super().is_dtype(dtype) def __from_arrow__( - self, array: Union[pyarrow.Array, pyarrow.ChunkedArray] + self, array: pyarrow.Array | pyarrow.ChunkedArray ) -> IntervalArray: """ Construct IntervalArray from pyarrow Array/ChunkedArray. @@ -1220,7 +1220,7 @@ def __from_arrow__( return IntervalArray._concat_same_type(results) - def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]: + def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: # NB: this doesn't handle checking for closed match if not all(isinstance(x, IntervalDtype) for x in dtypes): return None @@ -1258,7 +1258,7 @@ class PandasDtype(ExtensionDtype): _metadata = ("_dtype",) - def __init__(self, dtype: Optional[Union[NpDtype, PandasDtype]]): + def __init__(self, dtype: NpDtype | PandasDtype | None): if isinstance(dtype, PandasDtype): # make constructor univalent dtype = dtype.numpy_dtype @@ -1282,7 +1282,7 @@ def name(self) -> str: return self._dtype.name @property - def type(self) -> Type[np.generic]: + def type(self) -> type[np.generic]: """ The type object used to instantiate a scalar of this NumPy data-type. """ @@ -1310,7 +1310,7 @@ def construct_from_string(cls, string: str) -> PandasDtype: return cls(dtype) @classmethod - def construct_array_type(cls) -> Type[PandasArray]: + def construct_array_type(cls) -> type[PandasArray]: """ Return the array type associated with this dtype. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 82e984d36b6a1..87b5f62c64fad 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -492,12 +492,12 @@ class DataFrame(NDFrame, OpsMixin): _HANDLED_TYPES = (Series, Index, ExtensionArray, np.ndarray) @property - def _constructor(self) -> Type[DataFrame]: + def _constructor(self) -> type[DataFrame]: return DataFrame - _constructor_sliced: Type[Series] = Series - _hidden_attrs: FrozenSet[str] = NDFrame._hidden_attrs | frozenset([]) - _accessors: Set[str] = {"sparse"} + _constructor_sliced: type[Series] = Series + _hidden_attrs: frozenset[str] = NDFrame._hidden_attrs | frozenset([]) + _accessors: set[str] = {"sparse"} @property def _constructor_expanddim(self): @@ -514,9 +514,9 @@ def constructor(*args, **kwargs): def __init__( self, data=None, - index: Optional[Axes] = None, - columns: Optional[Axes] = None, - dtype: Optional[Dtype] = None, + index: Axes | None = None, + columns: Axes | None = None, + dtype: Dtype | None = None, copy: bool = False, ): if data is None: @@ -633,7 +633,7 @@ def _as_manager(self, typ: str) -> DataFrame: # ---------------------------------------------------------------------- @property - def axes(self) -> List[Index]: + def axes(self) -> list[Index]: """ Return a list representing the axes of the DataFrame. @@ -650,7 +650,7 @@ def axes(self) -> List[Index]: return [self.index, self.columns] @property - def shape(self) -> Tuple[int, int]: + def shape(self) -> tuple[int, int]: """ Return a tuple representing the dimensionality of the DataFrame. @@ -823,7 +823,7 @@ def __repr__(self) -> str: return buf.getvalue() - def _repr_html_(self) -> Optional[str]: + def _repr_html_(self) -> str | None: """ Return a html representation for a particular DataFrame. @@ -878,26 +878,26 @@ def _repr_html_(self) -> Optional[str]: @Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring) def to_string( self, - buf: Optional[FilePathOrBuffer[str]] = None, - columns: Optional[Sequence[str]] = None, - col_space: Optional[int] = None, - header: Union[bool, Sequence[str]] = True, + buf: FilePathOrBuffer[str] | None = None, + columns: Sequence[str] | None = None, + col_space: int | None = None, + header: bool | Sequence[str] = True, index: bool = True, na_rep: str = "NaN", - formatters: Optional[fmt.FormattersType] = None, - float_format: Optional[fmt.FloatFormatType] = None, - sparsify: Optional[bool] = None, + formatters: fmt.FormattersType | None = None, + float_format: fmt.FloatFormatType | None = None, + sparsify: bool | None = None, index_names: bool = True, - justify: Optional[str] = None, - max_rows: Optional[int] = None, - min_rows: Optional[int] = None, - max_cols: Optional[int] = None, + justify: str | None = None, + max_rows: int | None = None, + min_rows: int | None = None, + max_cols: int | None = None, show_dimensions: bool = False, decimal: str = ".", - line_width: Optional[int] = None, - max_colwidth: Optional[int] = None, - encoding: Optional[str] = None, - ) -> Optional[str]: + line_width: int | None = None, + max_colwidth: int | None = None, + encoding: str | None = None, + ) -> str | None: """ Render a DataFrame to a console-friendly tabular output. %(shared_params)s @@ -1022,7 +1022,7 @@ def style(self) -> Styler: """ @Appender(_shared_docs["items"]) - def items(self) -> Iterable[Tuple[Hashable, Series]]: + def items(self) -> Iterable[tuple[Hashable, Series]]: if self.columns.is_unique and hasattr(self, "_item_cache"): for k in self.columns: yield k, self._get_item_cache(k) @@ -1031,10 +1031,10 @@ def items(self) -> Iterable[Tuple[Hashable, Series]]: yield k, self._ixs(i, axis=1) @Appender(_shared_docs["items"]) - def iteritems(self) -> Iterable[Tuple[Hashable, Series]]: + def iteritems(self) -> Iterable[tuple[Hashable, Series]]: yield from self.items() - def iterrows(self) -> Iterable[Tuple[Hashable, Series]]: + def iterrows(self) -> Iterable[tuple[Hashable, Series]]: """ Iterate over DataFrame rows as (index, Series) pairs. @@ -1082,7 +1082,7 @@ def iterrows(self) -> Iterable[Tuple[Hashable, Series]]: s = klass(v, index=columns, name=k) yield k, s - def itertuples(self, index: bool = True, name: Optional[str] = "Pandas"): + def itertuples(self, index: bool = True, name: str | None = "Pandas"): """ Iterate over DataFrame rows as namedtuples. @@ -1179,10 +1179,10 @@ def dot(self, other: Series) -> Series: # type: ignore[misc] ... @overload - def dot(self, other: Union[DataFrame, Index, ArrayLike]) -> DataFrame: + def dot(self, other: DataFrame | Index | ArrayLike) -> DataFrame: ... - def dot(self, other: Union[AnyArrayLike, FrameOrSeriesUnion]) -> FrameOrSeriesUnion: + def dot(self, other: AnyArrayLike | FrameOrSeriesUnion) -> FrameOrSeriesUnion: """ Compute the matrix multiplication between the DataFrame and other. @@ -1298,12 +1298,12 @@ def __matmul__(self, other: Series) -> Series: @overload def __matmul__( - self, other: Union[AnyArrayLike, FrameOrSeriesUnion] + self, other: AnyArrayLike | FrameOrSeriesUnion ) -> FrameOrSeriesUnion: ... def __matmul__( - self, other: Union[AnyArrayLike, FrameOrSeriesUnion] + self, other: AnyArrayLike | FrameOrSeriesUnion ) -> FrameOrSeriesUnion: """ Matrix multiplication using binary `@` operator in Python>=3.5. @@ -1328,7 +1328,7 @@ def __rmatmul__(self, other): @classmethod def from_dict( - cls, data, orient="columns", dtype: Optional[Dtype] = None, columns=None + cls, data, orient="columns", dtype: Dtype | None = None, columns=None ) -> DataFrame: """ Construct DataFrame from dict of array-like or dicts. @@ -1409,7 +1409,7 @@ def from_dict( def to_numpy( self, - dtype: Optional[NpDtype] = None, + dtype: NpDtype | None = None, copy: bool = False, na_value=lib.no_default, ) -> np.ndarray: @@ -1654,13 +1654,13 @@ def to_dict(self, orient: str = "dict", into=dict): def to_gbq( self, destination_table: str, - project_id: Optional[str] = None, - chunksize: Optional[int] = None, + project_id: str | None = None, + chunksize: int | None = None, reauth: bool = False, if_exists: str = "fail", auth_local_webserver: bool = False, - table_schema: Optional[List[Dict[str, str]]] = None, - location: Optional[str] = None, + table_schema: list[dict[str, str]] | None = None, + location: str | None = None, progress_bar: bool = True, credentials=None, ) -> None: @@ -2111,7 +2111,7 @@ def _from_arrays( arrays, columns, index, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, verify_integrity: bool = True, ) -> DataFrame: """ @@ -2156,14 +2156,14 @@ def _from_arrays( def to_stata( self, path: FilePathOrBuffer, - convert_dates: Optional[Dict[Hashable, str]] = None, + convert_dates: dict[Hashable, str] | None = None, write_index: bool = True, - byteorder: Optional[str] = None, - time_stamp: Optional[datetime.datetime] = None, - data_label: Optional[str] = None, - variable_labels: Optional[Dict[Hashable, str]] = None, - version: Optional[int] = 114, - convert_strl: Optional[Sequence[Hashable]] = None, + byteorder: str | None = None, + time_stamp: datetime.datetime | None = None, + data_label: str | None = None, + variable_labels: dict[Hashable, str] | None = None, + version: int | None = 114, + convert_strl: Sequence[Hashable] | None = None, compression: CompressionOptions = "infer", storage_options: StorageOptions = None, ) -> None: @@ -2288,7 +2288,7 @@ def to_stata( StataWriterUTF8 as statawriter, ) - kwargs: Dict[str, Any] = {} + kwargs: dict[str, Any] = {} if version is None or version >= 117: # strl conversion is only supported >= 117 kwargs["convert_strl"] = convert_strl @@ -2361,12 +2361,12 @@ def to_feather(self, path: FilePathOrBuffer[AnyStr], **kwargs) -> None: ) def to_markdown( self, - buf: Optional[Union[IO[str], str]] = None, + buf: IO[str] | str | None = None, mode: str = "wt", index: bool = True, storage_options: StorageOptions = None, **kwargs, - ) -> Optional[str]: + ) -> str | None: if "showindex" in kwargs: warnings.warn( "'showindex' is deprecated. Only 'index' will be used " @@ -2392,14 +2392,14 @@ def to_markdown( @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") def to_parquet( self, - path: Optional[FilePathOrBuffer] = None, + path: FilePathOrBuffer | None = None, engine: str = "auto", - compression: Optional[str] = "snappy", - index: Optional[bool] = None, - partition_cols: Optional[List[str]] = None, + compression: str | None = "snappy", + index: bool | None = None, + partition_cols: list[str] | None = None, storage_options: StorageOptions = None, **kwargs, - ) -> Optional[bytes]: + ) -> bytes | None: """ Write a DataFrame to the binary parquet format. @@ -2517,29 +2517,29 @@ def to_parquet( @Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring) def to_html( self, - buf: Optional[FilePathOrBuffer[str]] = None, - columns: Optional[Sequence[str]] = None, - col_space: Optional[ColspaceArgType] = None, - header: Union[bool, Sequence[str]] = True, + buf: FilePathOrBuffer[str] | None = None, + columns: Sequence[str] | None = None, + col_space: ColspaceArgType | None = None, + header: bool | Sequence[str] = True, index: bool = True, na_rep: str = "NaN", - formatters: Optional[FormattersType] = None, - float_format: Optional[FloatFormatType] = None, - sparsify: Optional[bool] = None, + formatters: FormattersType | None = None, + float_format: FloatFormatType | None = None, + sparsify: bool | None = None, index_names: bool = True, - justify: Optional[str] = None, - max_rows: Optional[int] = None, - max_cols: Optional[int] = None, - show_dimensions: Union[bool, str] = False, + justify: str | None = None, + max_rows: int | None = None, + max_cols: int | None = None, + show_dimensions: bool | str = False, decimal: str = ".", bold_rows: bool = True, - classes: Optional[Union[str, List, Tuple]] = None, + classes: str | list | tuple | None = None, escape: bool = True, notebook: bool = False, - border: Optional[int] = None, - table_id: Optional[str] = None, + border: int | None = None, + table_id: str | None = None, render_links: bool = False, - encoding: Optional[str] = None, + encoding: str | None = None, ): """ Render a DataFrame as an HTML table. @@ -2723,12 +2723,12 @@ def to_html( @doc(BaseInfo.render) def info( self, - verbose: Optional[bool] = None, - buf: Optional[IO[str]] = None, - max_cols: Optional[int] = None, - memory_usage: Optional[Union[bool, str]] = None, - show_counts: Optional[bool] = None, - null_counts: Optional[bool] = None, + verbose: bool | None = None, + buf: IO[str] | None = None, + max_cols: int | None = None, + memory_usage: bool | str | None = None, + show_counts: bool | None = None, + null_counts: bool | None = None, ) -> None: if null_counts is not None: if show_counts is not None: @@ -3800,8 +3800,8 @@ def select_dtypes(self, include=None, exclude=None) -> DataFrame: keep_these = np.full(self.shape[1], True) def extract_unique_dtypes_from_dtypes_set( - dtypes_set: FrozenSet[Dtype], unique_dtypes: np.ndarray - ) -> List[Dtype]: + dtypes_set: frozenset[Dtype], unique_dtypes: np.ndarray + ) -> list[Dtype]: extracted_dtypes = [ unique_dtype for unique_dtype in unique_dtypes @@ -4152,14 +4152,14 @@ def align( self, other, join: str = "outer", - axis: Optional[Axis] = None, - level: Optional[Level] = None, + axis: Axis | None = None, + level: Level | None = None, copy: bool = True, fill_value=None, - method: Optional[str] = None, + method: str | None = None, limit=None, fill_axis: Axis = 0, - broadcast_axis: Optional[Axis] = None, + broadcast_axis: Axis | None = None, ) -> DataFrame: return super().align( other, @@ -4243,7 +4243,7 @@ def drop( axis: Axis = 0, index=None, columns=None, - level: Optional[Level] = None, + level: Level | None = None, inplace: bool = False, errors: str = "raise", ): @@ -4383,16 +4383,16 @@ def drop( ) def rename( self, - mapper: Optional[Renamer] = None, + mapper: Renamer | None = None, *, - index: Optional[Renamer] = None, - columns: Optional[Renamer] = None, - axis: Optional[Axis] = None, + index: Renamer | None = None, + columns: Renamer | None = None, + axis: Axis | None = None, copy: bool = True, inplace: bool = False, - level: Optional[Level] = None, + level: Level | None = None, errors: str = "ignore", - ) -> Optional[DataFrame]: + ) -> DataFrame | None: """ Alter axes labels. @@ -4515,12 +4515,12 @@ def rename( def fillna( self, value=None, - method: Optional[str] = None, - axis: Optional[Axis] = None, + method: str | None = None, + axis: Axis | None = None, inplace: bool = False, limit=None, downcast=None, - ) -> Optional[DataFrame]: + ) -> DataFrame | None: return super().fillna( value=value, method=method, @@ -4593,7 +4593,7 @@ def replace( ) def _replace_columnwise( - self, mapping: Dict[Hashable, Tuple[Any, Any]], inplace: bool, regex + self, mapping: dict[Hashable, tuple[Any, Any]], inplace: bool, regex ): """ Dispatch to Series.replace column-wise. @@ -4771,7 +4771,7 @@ def set_index( "one-dimensional arrays." ) - missing: List[Hashable] = [] + missing: list[Hashable] = [] for col in keys: if isinstance(col, (Index, Series, np.ndarray, list, abc.Iterator)): # arrays are fine as long as they are one-dimensional @@ -4799,7 +4799,7 @@ def set_index( frame = self.copy() arrays = [] - names: List[Hashable] = [] + names: list[Hashable] = [] if append: names = list(self.index.names) if isinstance(self.index, MultiIndex): @@ -4808,7 +4808,7 @@ def set_index( else: arrays.append(self.index) - to_remove: List[Hashable] = [] + to_remove: list[Hashable] = [] for col in keys: if isinstance(col, MultiIndex): for n in range(col.nlevels): @@ -4862,7 +4862,7 @@ def set_index( # Overloaded function signatures 1 and 2 overlap with incompatible return types def reset_index( # type: ignore[misc] self, - level: Optional[Union[Hashable, Sequence[Hashable]]] = ..., + level: Hashable | Sequence[Hashable] | None = ..., drop: bool = ..., inplace: Literal[False] = ..., col_level: Hashable = ..., @@ -4873,7 +4873,7 @@ def reset_index( # type: ignore[misc] @overload def reset_index( self, - level: Optional[Union[Hashable, Sequence[Hashable]]] = ..., + level: Hashable | Sequence[Hashable] | None = ..., drop: bool = ..., inplace: Literal[True] = ..., col_level: Hashable = ..., @@ -4883,12 +4883,12 @@ def reset_index( def reset_index( self, - level: Optional[Union[Hashable, Sequence[Hashable]]] = None, + level: Hashable | Sequence[Hashable] | None = None, drop: bool = False, inplace: bool = False, col_level: Hashable = 0, col_fill: Hashable = "", - ) -> Optional[DataFrame]: + ) -> DataFrame | None: """ Reset the index, or a level of it. @@ -5046,7 +5046,7 @@ class max type new_index = self.index.droplevel(level) if not drop: - to_insert: Iterable[Tuple[Any, Optional[Any]]] + to_insert: Iterable[tuple[Any, Any | None]] if isinstance(self.index, MultiIndex): names = [ (n if n is not None else f"level_{i}") @@ -5269,11 +5269,11 @@ def dropna( def drop_duplicates( self, - subset: Optional[Union[Hashable, Sequence[Hashable]]] = None, - keep: Union[str, bool] = "first", + subset: Hashable | Sequence[Hashable] | None = None, + keep: str | bool = "first", inplace: bool = False, ignore_index: bool = False, - ) -> Optional[DataFrame]: + ) -> DataFrame | None: """ Return DataFrame with duplicate rows removed. @@ -5366,8 +5366,8 @@ def drop_duplicates( def duplicated( self, - subset: Optional[Union[Hashable, Sequence[Hashable]]] = None, - keep: Union[str, bool] = "first", + subset: Hashable | Sequence[Hashable] | None = None, + keep: str | bool = "first", ) -> Series: """ Return boolean Series denoting duplicate rows. @@ -5565,7 +5565,7 @@ def sort_values( # type: ignore[override] def sort_index( self, axis: Axis = 0, - level: Optional[Level] = None, + level: Level | None = None, ascending: bool = True, inplace: bool = False, kind: str = "quicksort", @@ -5677,7 +5677,7 @@ def sort_index( def value_counts( self, - subset: Optional[Sequence[Hashable]] = None, + subset: Sequence[Hashable] | None = None, normalize: bool = False, sort: bool = True, ascending: bool = False, @@ -6068,7 +6068,7 @@ def _arith_method(self, other, op): _logical_method = _arith_method - def _dispatch_frame_op(self, right, func, axis: Optional[int] = None): + def _dispatch_frame_op(self, right, func, axis: int | None = None): """ Evaluate the frame operation func(left, right) by evaluating column-by-column, dispatching to the Series implementation. @@ -6171,13 +6171,13 @@ def _construct_result(self, result) -> DataFrame: out.index = self.index return out - def __divmod__(self, other) -> Tuple[DataFrame, DataFrame]: + def __divmod__(self, other) -> tuple[DataFrame, DataFrame]: # Naive implementation, room for optimization div = self // other mod = self - div * other return div, mod - def __rdivmod__(self, other) -> Tuple[DataFrame, DataFrame]: + def __rdivmod__(self, other) -> tuple[DataFrame, DataFrame]: # Naive implementation, room for optimization div = other // self mod = other - div * self @@ -6791,7 +6791,7 @@ def groupby( self, by=None, axis: Axis = 0, - level: Optional[Level] = None, + level: Level | None = None, as_index: bool = True, sort: bool = True, group_keys: bool = True, @@ -7315,7 +7315,7 @@ def stack(self, level: Level = -1, dropna: bool = True): return result.__finalize__(self, method="stack") def explode( - self, column: Union[str, Tuple], ignore_index: bool = False + self, column: str | tuple, ignore_index: bool = False ) -> DataFrame: """ Transform each element of a list-like to a row, replicating index values. @@ -7463,7 +7463,7 @@ def melt( value_vars=None, var_name=None, value_name="value", - col_level: Optional[Level] = None, + col_level: Level | None = None, ignore_index=True, ) -> DataFrame: @@ -7574,7 +7574,7 @@ def _gotitem( self, key: IndexLabel, ndim: int, - subset: Optional[FrameOrSeriesUnion] = None, + subset: FrameOrSeriesUnion | None = None, ) -> FrameOrSeriesUnion: """ Sub-classes to define. Return a sliced object. @@ -7889,7 +7889,7 @@ def apply( return op.apply() def applymap( - self, func: PythonFuncType, na_action: Optional[str] = None + self, func: PythonFuncType, na_action: str | None = None ) -> DataFrame: """ Apply a function to a Dataframe elementwise. @@ -8117,7 +8117,7 @@ def append( def join( self, other: FrameOrSeriesUnion, - on: Optional[IndexLabel] = None, + on: IndexLabel | None = None, how: str = "left", lsuffix: str = "", rsuffix: str = "", @@ -8247,7 +8247,7 @@ def join( def _join_compat( self, other: FrameOrSeriesUnion, - on: Optional[IndexLabel] = None, + on: IndexLabel | None = None, how: str = "left", lsuffix: str = "", rsuffix: str = "", @@ -8318,16 +8318,16 @@ def merge( self, right: FrameOrSeriesUnion, how: str = "inner", - on: Optional[IndexLabel] = None, - left_on: Optional[IndexLabel] = None, - right_on: Optional[IndexLabel] = None, + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, left_index: bool = False, right_index: bool = False, sort: bool = False, suffixes: Suffixes = ("_x", "_y"), copy: bool = True, indicator: bool = False, - validate: Optional[str] = None, + validate: str | None = None, ) -> DataFrame: from pandas.core.reshape.merge import merge @@ -8552,7 +8552,7 @@ def corr(self, method="pearson", min_periods=1) -> DataFrame: return self._constructor(correl, index=idx, columns=cols) def cov( - self, min_periods: Optional[int] = None, ddof: Optional[int] = 1 + self, min_periods: int | None = None, ddof: int | None = 1 ) -> DataFrame: """ Compute pairwise covariance of columns, excluding NA/null values. @@ -8766,7 +8766,7 @@ def c(x): # ndarray-like stats methods def count( - self, axis: Axis = 0, level: Optional[Level] = None, numeric_only: bool = False + self, axis: Axis = 0, level: Level | None = None, numeric_only: bool = False ): """ Count non-NA cells for each column or row. @@ -8923,7 +8923,7 @@ def _reduce( *, axis: Axis = 0, skipna: bool = True, - numeric_only: Optional[bool] = None, + numeric_only: bool | None = None, filter_type=None, **kwds, ): @@ -9435,7 +9435,7 @@ def asfreq( self, freq, method=None, - how: Optional[str] = None, + how: str | None = None, normalize: bool = False, fill_value=None, ) -> DataFrame: @@ -9452,16 +9452,16 @@ def resample( self, rule, axis=0, - closed: Optional[str] = None, - label: Optional[str] = None, + closed: str | None = None, + label: str | None = None, convention: str = "start", - kind: Optional[str] = None, + kind: str | None = None, loffset=None, - base: Optional[int] = None, + base: int | None = None, on=None, level=None, - origin: Union[str, TimestampConvertibleTypes] = "start_day", - offset: Optional[TimedeltaConvertibleTypes] = None, + origin: str | TimestampConvertibleTypes = "start_day", + offset: TimedeltaConvertibleTypes | None = None, ) -> Resampler: return super().resample( rule=rule, @@ -9641,7 +9641,7 @@ def isin(self, values) -> DataFrame: # ---------------------------------------------------------------------- # Add index and columns _AXIS_ORDERS = ["index", "columns"] - _AXIS_TO_AXIS_NUMBER: Dict[Axis, int] = { + _AXIS_TO_AXIS_NUMBER: dict[Axis, int] = { **NDFrame._AXIS_TO_AXIS_NUMBER, 1: 1, "columns": 1, @@ -9659,13 +9659,13 @@ def isin(self, values) -> DataFrame: ) @property - def _AXIS_NUMBERS(self) -> Dict[str, int]: + def _AXIS_NUMBERS(self) -> dict[str, int]: """.. deprecated:: 1.1.0""" super()._AXIS_NUMBERS return {"index": 0, "columns": 1} @property - def _AXIS_NAMES(self) -> Dict[int, str]: + def _AXIS_NAMES(self) -> dict[int, str]: """.. deprecated:: 1.1.0""" super()._AXIS_NAMES return {0: "index", 1: "columns"} diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 96b35f1aaab9c..6725d5b451f6c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -158,7 +158,7 @@ class NDFrame(PandasObject, SelectionMixin, indexing.IndexingMixin): copy : bool, default False """ - _internal_names: List[str] = [ + _internal_names: list[str] = [ "_mgr", "_cacher", "_item_cache", @@ -174,15 +174,15 @@ class NDFrame(PandasObject, SelectionMixin, indexing.IndexingMixin): "__array_interface__", "_flags", ] - _internal_names_set: Set[str] = set(_internal_names) - _accessors: Set[str] = set() - _hidden_attrs: FrozenSet[str] = frozenset( + _internal_names_set: set[str] = set(_internal_names) + _accessors: set[str] = set() + _hidden_attrs: frozenset[str] = frozenset( ["_AXIS_NAMES", "_AXIS_NUMBERS", "get_values", "tshift"] ) - _metadata: List[str] = [] + _metadata: list[str] = [] _is_copy = None _mgr: Manager - _attrs: Dict[Optional[Hashable], Any] + _attrs: dict[Hashable | None, Any] _typ: str # ---------------------------------------------------------------------- @@ -192,7 +192,7 @@ def __init__( self, data: Manager, copy: bool = False, - attrs: Optional[Mapping[Optional[Hashable], Any]] = None, + attrs: Mapping[Hashable | None, Any] | None = None, ): # copy kwarg is retained for mypy compat, is not used @@ -208,7 +208,7 @@ def __init__( @classmethod def _init_mgr( - cls, mgr, axes, dtype: Optional[Dtype] = None, copy: bool = False + cls, mgr, axes, dtype: Dtype | None = None, copy: bool = False ) -> Manager: """ passed a manager and a axes dict """ for a, axe in axes.items(): @@ -236,7 +236,7 @@ def _init_mgr( # attrs and flags @property - def attrs(self) -> Dict[Optional[Hashable], Any]: + def attrs(self) -> dict[Hashable | None, Any]: """ Dictionary of global attributes of this dataset. @@ -253,7 +253,7 @@ def attrs(self) -> Dict[Optional[Hashable], Any]: return self._attrs @attrs.setter - def attrs(self, value: Mapping[Optional[Hashable], Any]) -> None: + def attrs(self, value: Mapping[Hashable | None, Any]) -> None: self._attrs = dict(value) @final @@ -302,7 +302,7 @@ def set_flags( self: FrameOrSeries, *, copy: bool = False, - allows_duplicate_labels: Optional[bool] = None, + allows_duplicate_labels: bool | None = None, ) -> FrameOrSeries: """ Return a new object with updated flags. @@ -368,7 +368,7 @@ def _validate_dtype(cls, dtype): # Construction @property - def _constructor(self: FrameOrSeries) -> Type[FrameOrSeries]: + def _constructor(self: FrameOrSeries) -> type[FrameOrSeries]: """ Used when a manipulation result has the same dimensions as the original. @@ -406,21 +406,21 @@ def _data(self): _stat_axis_number = 0 _stat_axis_name = "index" _ix = None - _AXIS_ORDERS: List[str] - _AXIS_TO_AXIS_NUMBER: Dict[Axis, int] = {0: 0, "index": 0, "rows": 0} + _AXIS_ORDERS: list[str] + _AXIS_TO_AXIS_NUMBER: dict[Axis, int] = {0: 0, "index": 0, "rows": 0} _AXIS_REVERSED: bool _info_axis_number: int _info_axis_name: str _AXIS_LEN: int @property - def _AXIS_NUMBERS(self) -> Dict[str, int]: + def _AXIS_NUMBERS(self) -> dict[str, int]: """.. deprecated:: 1.1.0""" warnings.warn("_AXIS_NUMBERS has been deprecated.", FutureWarning, stacklevel=3) return {"index": 0} @property - def _AXIS_NAMES(self) -> Dict[int, str]: + def _AXIS_NAMES(self) -> dict[int, str]: """.. deprecated:: 1.1.0""" warnings.warn("_AXIS_NAMES has been deprecated.", FutureWarning, stacklevel=3) return {0: "index"} @@ -495,7 +495,7 @@ def _get_block_manager_axis(cls, axis: Axis) -> int: return axis @final - def _get_axis_resolvers(self, axis: str) -> Dict[str, Union[Series, MultiIndex]]: + def _get_axis_resolvers(self, axis: str) -> dict[str, Series | MultiIndex]: # index or columns axis_index = getattr(self, axis) d = {} @@ -526,17 +526,17 @@ def _get_axis_resolvers(self, axis: str) -> Dict[str, Union[Series, MultiIndex]] return d @final - def _get_index_resolvers(self) -> Dict[Hashable, Union[Series, MultiIndex]]: + def _get_index_resolvers(self) -> dict[Hashable, Series | MultiIndex]: from pandas.core.computation.parsing import clean_column_name - d: Dict[str, Union[Series, MultiIndex]] = {} + d: dict[str, Series | MultiIndex] = {} for axis_name in self._AXIS_ORDERS: d.update(self._get_axis_resolvers(axis_name)) return {clean_column_name(k): v for k, v in d.items() if not isinstance(k, int)} @final - def _get_cleaned_column_resolvers(self) -> Dict[Hashable, Series]: + def _get_cleaned_column_resolvers(self) -> dict[Hashable, Series]: """ Return the special character free column resolvers of a dataframe. @@ -562,14 +562,14 @@ def _stat_axis(self) -> Index: return getattr(self, self._stat_axis_name) @property - def shape(self) -> Tuple[int, ...]: + def shape(self) -> tuple[int, ...]: """ Return a tuple of axis dimensions """ return tuple(len(self._get_axis(a)) for a in self._AXIS_ORDERS) @property - def axes(self) -> List[Index]: + def axes(self) -> list[Index]: """ Return index label(s) of the internal NDFrame """ @@ -775,7 +775,7 @@ def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries: new_labels = labels.droplevel(level) return self.set_axis(new_labels, axis=axis, inplace=False) - def pop(self, item: Hashable) -> Union[Series, Any]: + def pop(self, item: Hashable) -> Series | Any: result = self[item] del self[item] if self.ndim == 2: @@ -900,16 +900,16 @@ def squeeze(self, axis=None): def rename( self: FrameOrSeries, - mapper: Optional[Renamer] = None, + mapper: Renamer | None = None, *, - index: Optional[Renamer] = None, - columns: Optional[Renamer] = None, - axis: Optional[Axis] = None, + index: Renamer | None = None, + columns: Renamer | None = None, + axis: Axis | None = None, copy: bool = True, inplace: bool = False, - level: Optional[Level] = None, + level: Level | None = None, errors: str = "ignore", - ) -> Optional[FrameOrSeries]: + ) -> FrameOrSeries | None: """ Alter axes input function or functions. Function / dict values must be unique (1-to-1). Labels not contained in a dict / Series will be left @@ -1904,13 +1904,13 @@ def empty(self) -> bool_t: # GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented __array_priority__ = 1000 - def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray: + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: return np.asarray(self._values, dtype=dtype) def __array_wrap__( self, result: np.ndarray, - context: Optional[Tuple[Callable, Tuple[Any, ...], int]] = None, + context: tuple[Callable, tuple[Any, ...], int] | None = None, ): """ Gets called after a ufunc and other functions. @@ -1956,7 +1956,7 @@ def __array_ufunc__( # Picklability @final - def __getstate__(self) -> Dict[str, Any]: + def __getstate__(self) -> dict[str, Any]: meta = {k: getattr(self, k, None) for k in self._metadata} return { "_mgr": self._mgr, @@ -2046,7 +2046,7 @@ def to_excel( excel_writer, sheet_name: str = "Sheet1", na_rep: str = "", - float_format: Optional[str] = None, + float_format: str | None = None, columns=None, header=True, index=True, @@ -2208,19 +2208,19 @@ def to_excel( @doc(storage_options=_shared_docs["storage_options"]) def to_json( self, - path_or_buf: Optional[FilePathOrBuffer] = None, - orient: Optional[str] = None, - date_format: Optional[str] = None, + path_or_buf: FilePathOrBuffer | None = None, + orient: str | None = None, + date_format: str | None = None, double_precision: int = 10, force_ascii: bool_t = True, date_unit: str = "ms", - default_handler: Optional[Callable[[Any], JSONSerializable]] = None, + default_handler: Callable[[Any], JSONSerializable] | None = None, lines: bool_t = False, compression: CompressionOptions = "infer", index: bool_t = True, - indent: Optional[int] = None, + indent: int | None = None, storage_options: StorageOptions = None, - ) -> Optional[str]: + ) -> str | None: """ Convert the object to a JSON string. @@ -2493,15 +2493,15 @@ def to_hdf( path_or_buf, key: str, mode: str = "a", - complevel: Optional[int] = None, - complib: Optional[str] = None, + complevel: int | None = None, + complib: str | None = None, append: bool_t = False, - format: Optional[str] = None, + format: str | None = None, index: bool_t = True, - min_itemsize: Optional[Union[int, Dict[str, int]]] = None, + min_itemsize: int | dict[str, int] | None = None, nan_rep=None, - dropna: Optional[bool_t] = None, - data_columns: Optional[Union[bool_t, List[str]]] = None, + dropna: bool_t | None = None, + data_columns: bool_t | list[str] | None = None, errors: str = "strict", encoding: str = "UTF-8", ) -> None: @@ -2644,7 +2644,7 @@ def to_sql( index: bool_t = True, index_label=None, chunksize=None, - dtype: Optional[DtypeArg] = None, + dtype: DtypeArg | None = None, method=None, ) -> None: """ @@ -2884,7 +2884,7 @@ def to_pickle( @final def to_clipboard( - self, excel: bool_t = True, sep: Optional[str] = None, **kwargs + self, excel: bool_t = True, sep: str | None = None, **kwargs ) -> None: r""" Copy object to the system clipboard. @@ -3218,28 +3218,28 @@ def to_latex( @doc(storage_options=_shared_docs["storage_options"]) def to_csv( self, - path_or_buf: Optional[FilePathOrBuffer] = None, + path_or_buf: FilePathOrBuffer | None = None, sep: str = ",", na_rep: str = "", - float_format: Optional[str] = None, - columns: Optional[Sequence[Hashable]] = None, - header: Union[bool_t, List[str]] = True, + float_format: str | None = None, + columns: Sequence[Hashable] | None = None, + header: bool_t | list[str] = True, index: bool_t = True, - index_label: Optional[IndexLabel] = None, + index_label: IndexLabel | None = None, mode: str = "w", - encoding: Optional[str] = None, + encoding: str | None = None, compression: CompressionOptions = "infer", - quoting: Optional[int] = None, + quoting: int | None = None, quotechar: str = '"', - line_terminator: Optional[str] = None, - chunksize: Optional[int] = None, - date_format: Optional[str] = None, + line_terminator: str | None = None, + chunksize: int | None = None, + date_format: str | None = None, doublequote: bool_t = True, - escapechar: Optional[str] = None, + escapechar: str | None = None, decimal: str = ".", errors: str = "strict", storage_options: StorageOptions = None, - ) -> Optional[str]: + ) -> str | None: r""" Write object to a comma-separated values (csv) file. @@ -3503,7 +3503,7 @@ def _clear_item_cache(self) -> None: # Indexing Methods def take( - self: FrameOrSeries, indices, axis=0, is_copy: Optional[bool_t] = None, **kwargs + self: FrameOrSeries, indices, axis=0, is_copy: bool_t | None = None, **kwargs ) -> FrameOrSeries: """ Return the elements in the given *positional* indices along an axis. @@ -4012,7 +4012,7 @@ def _is_view(self) -> bool_t: def reindex_like( self: FrameOrSeries, other, - method: Optional[str] = None, + method: str | None = None, copy: bool_t = True, limit=None, tolerance=None, @@ -4894,8 +4894,8 @@ def _reindex_with_indexers( def filter( self: FrameOrSeries, items=None, - like: Optional[str] = None, - regex: Optional[str] = None, + like: str | None = None, + regex: str | None = None, axis=None, ) -> FrameOrSeries: """ @@ -5359,7 +5359,7 @@ def sample( @doc(klass=_shared_doc_kwargs["klass"]) def pipe( self, - func: Union[Callable[..., T], Tuple[Callable[..., T], str]], + func: Callable[..., T] | tuple[Callable[..., T], str], *args, **kwargs, ) -> T: @@ -5420,7 +5420,7 @@ def pipe( @final def __finalize__( - self: FrameOrSeries, other, method: Optional[str] = None, **kwargs + self: FrameOrSeries, other, method: str | None = None, **kwargs ) -> FrameOrSeries: """ Propagate metadata from other to self. @@ -5514,7 +5514,7 @@ def __setattr__(self, name: str, value) -> None: object.__setattr__(self, name, value) @final - def _dir_additions(self) -> Set[str]: + def _dir_additions(self) -> set[str]: """ add the string-like attributes from the info_axis. If info_axis is a MultiIndex, its first level values are used. @@ -6273,7 +6273,7 @@ def fillna( inplace: bool_t = False, limit=None, downcast=None, - ) -> Optional[FrameOrSeries]: + ) -> FrameOrSeries | None: """ Fill NA/NaN values using the specified method. @@ -6470,7 +6470,7 @@ def ffill( inplace: bool_t = False, limit=None, downcast=None, - ) -> Optional[FrameOrSeries]: + ) -> FrameOrSeries | None: """ Synonym for :meth:`DataFrame.fillna` with ``method='ffill'``. @@ -6492,7 +6492,7 @@ def bfill( inplace: bool_t = False, limit=None, downcast=None, - ) -> Optional[FrameOrSeries]: + ) -> FrameOrSeries | None: """ Synonym for :meth:`DataFrame.fillna` with ``method='bfill'``. @@ -6518,7 +6518,7 @@ def replace( to_replace=None, value=None, inplace: bool_t = False, - limit: Optional[int] = None, + limit: int | None = None, regex=False, method="pad", ): @@ -6695,13 +6695,13 @@ def interpolate( self: FrameOrSeries, method: str = "linear", axis: Axis = 0, - limit: Optional[int] = None, + limit: int | None = None, inplace: bool_t = False, - limit_direction: Optional[str] = None, - limit_area: Optional[str] = None, - downcast: Optional[str] = None, + limit_direction: str | None = None, + limit_area: str | None = None, + downcast: str | None = None, **kwargs, - ) -> Optional[FrameOrSeries]: + ) -> FrameOrSeries | None: """ Fill NaN values using an interpolation method. @@ -7481,7 +7481,7 @@ def asfreq( self: FrameOrSeries, freq, method=None, - how: Optional[str] = None, + how: str | None = None, normalize: bool_t = False, fill_value=None, ) -> FrameOrSeries: @@ -7750,16 +7750,16 @@ def resample( self, rule, axis=0, - closed: Optional[str] = None, - label: Optional[str] = None, + closed: str | None = None, + label: str | None = None, convention: str = "start", - kind: Optional[str] = None, + kind: str | None = None, loffset=None, - base: Optional[int] = None, + base: int | None = None, on=None, level=None, - origin: Union[str, TimestampConvertibleTypes] = "start_day", - offset: Optional[TimedeltaConvertibleTypes] = None, + origin: str | TimestampConvertibleTypes = "start_day", + offset: TimedeltaConvertibleTypes | None = None, ) -> Resampler: """ Resample time-series data. @@ -8318,7 +8318,7 @@ def rank( self: FrameOrSeries, axis=0, method: str = "average", - numeric_only: Optional[bool_t] = None, + numeric_only: bool_t | None = None, na_option: str = "keep", ascending: bool_t = True, pct: bool_t = False, @@ -10920,13 +10920,13 @@ def min(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): @doc(Rolling) def rolling( self, - window: Union[int, timedelta, BaseOffset, BaseIndexer], - min_periods: Optional[int] = None, + window: int | timedelta | BaseOffset | BaseIndexer, + min_periods: int | None = None, center: bool_t = False, - win_type: Optional[str] = None, - on: Optional[str] = None, + win_type: str | None = None, + on: str | None = None, axis: Axis = 0, - closed: Optional[str] = None, + closed: str | None = None, method: str = "single", ): axis = self._get_axis_number(axis) @@ -10961,7 +10961,7 @@ def rolling( def expanding( self, min_periods: int = 1, - center: Optional[bool_t] = None, + center: bool_t | None = None, axis: Axis = 0, method: str = "single", ) -> Expanding: @@ -10983,15 +10983,15 @@ def expanding( @doc(ExponentialMovingWindow) def ewm( self, - com: Optional[float] = None, - span: Optional[float] = None, - halflife: Optional[Union[float, TimedeltaConvertibleTypes]] = None, - alpha: Optional[float] = None, + com: float | None = None, + span: float | None = None, + halflife: float | TimedeltaConvertibleTypes | None = None, + alpha: float | None = None, min_periods: int = 0, adjust: bool_t = True, ignore_na: bool_t = False, axis: Axis = 0, - times: Optional[Union[str, np.ndarray, FrameOrSeries]] = None, + times: str | np.ndarray | FrameOrSeries | None = None, ) -> ExponentialMovingWindow: axis = self._get_axis_number(axis) return ExponentialMovingWindow( diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index a98ef15696339..e92086122e1d3 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -98,7 +98,7 @@ ScalarResult = TypeVar("ScalarResult") -def generate_property(name: str, klass: Type[FrameOrSeries]): +def generate_property(name: str, klass: type[FrameOrSeries]): """ Create a property for a GroupBy subclass to dispatch to DataFrame/Series. @@ -121,7 +121,7 @@ def prop(self): return property(prop) -def pin_allowlisted_properties(klass: Type[FrameOrSeries], allowlist: FrozenSet[str]): +def pin_allowlisted_properties(klass: type[FrameOrSeries], allowlist: frozenset[str]): """ Create GroupBy member defs for DataFrame/Series names in a allowlist. @@ -306,7 +306,7 @@ def _aggregate_multiple_funcs(self, arg): arg = zip(columns, arg) - results: Dict[base.OutputKey, FrameOrSeriesUnion] = {} + results: dict[base.OutputKey, FrameOrSeriesUnion] = {} for idx, (name, func) in enumerate(arg): obj = self @@ -328,8 +328,8 @@ def _aggregate_multiple_funcs(self, arg): # TODO: index should not be Optional - see GH 35490 def _wrap_series_output( self, - output: Mapping[base.OutputKey, Union[Series, np.ndarray]], - index: Optional[Index], + output: Mapping[base.OutputKey, Series | np.ndarray], + index: Index | None, ) -> FrameOrSeriesUnion: """ Wraps the output of a SeriesGroupBy operation into the expected result. @@ -369,8 +369,8 @@ def _wrap_series_output( # TODO: Remove index argument, use self.grouper.result_index, see GH 35490 def _wrap_aggregated_output( self, - output: Mapping[base.OutputKey, Union[Series, np.ndarray]], - index: Optional[Index], + output: Mapping[base.OutputKey, Series | np.ndarray], + index: Index | None, ) -> FrameOrSeriesUnion: """ Wraps the output of a SeriesGroupBy aggregation into the expected result. @@ -393,7 +393,7 @@ def _wrap_aggregated_output( return self._reindex_output(result) def _wrap_transformed_output( - self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]] + self, output: Mapping[base.OutputKey, Series | np.ndarray] ) -> Series: """ Wraps the output of a SeriesGroupBy aggregation into the expected result. @@ -420,7 +420,7 @@ def _wrap_transformed_output( return result def _wrap_applied_output( - self, keys: Index, values: Optional[List[Any]], not_indexed_same: bool = False + self, keys: Index, values: list[Any] | None, not_indexed_same: bool = False ) -> FrameOrSeriesUnion: """ Wrap the output of SeriesGroupBy.apply into the expected result. @@ -1140,7 +1140,7 @@ def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame: axis = self.axis obj = self._obj_with_exclusions - result: Dict[Hashable, Union[NDFrame, np.ndarray]] = {} + result: dict[Hashable, NDFrame | np.ndarray] = {} if axis != obj._info_axis_number: for name, data in self: fres = func(data, *args, **kwargs) @@ -1157,7 +1157,7 @@ def _aggregate_item_by_item(self, func, *args, **kwargs) -> DataFrame: # only for axis==0 obj = self._obj_with_exclusions - result: Dict[Union[int, str], NDFrame] = {} + result: dict[int | str, NDFrame] = {} cannot_agg = [] for item in obj: data = obj[item] @@ -1224,7 +1224,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): def _wrap_applied_output_series( self, keys, - values: List[Series], + values: list[Series], not_indexed_same: bool, first_not_none, key_index, @@ -1633,8 +1633,8 @@ def _insert_inaxis_grouper_inplace(self, result: DataFrame) -> None: def _wrap_aggregated_output( self, - output: Mapping[base.OutputKey, Union[Series, np.ndarray]], - index: Optional[Index], + output: Mapping[base.OutputKey, Series | np.ndarray], + index: Index | None, ) -> DataFrame: """ Wraps the output of DataFrameGroupBy aggregations into the expected result. @@ -1667,7 +1667,7 @@ def _wrap_aggregated_output( return self._reindex_output(result) def _wrap_transformed_output( - self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]] + self, output: Mapping[base.OutputKey, Series | np.ndarray] ) -> DataFrame: """ Wraps the output of DataFrameGroupBy transformations into the expected result. diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 5758762c13984..5b4e44f14dbdf 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -498,8 +498,8 @@ def group_selection_context(groupby: BaseGroupBy) -> Iterator[BaseGroupBy]: class BaseGroupBy(PandasObject, SelectionMixin, Generic[FrameOrSeries]): - _group_selection: Optional[IndexLabel] = None - _apply_allowlist: FrozenSet[str] = frozenset() + _group_selection: IndexLabel | None = None + _apply_allowlist: frozenset[str] = frozenset() _hidden_attrs = PandasObject._hidden_attrs | { "as_index", "axis", @@ -519,12 +519,12 @@ class BaseGroupBy(PandasObject, SelectionMixin, Generic[FrameOrSeries]): def __init__( self, obj: FrameOrSeries, - keys: Optional[_KeysArgType] = None, + keys: _KeysArgType | None = None, axis: int = 0, - level: Optional[IndexLabel] = None, - grouper: Optional[ops.BaseGrouper] = None, - exclusions: Optional[Set[Hashable]] = None, - selection: Optional[IndexLabel] = None, + level: IndexLabel | None = None, + grouper: ops.BaseGrouper | None = None, + exclusions: set[Hashable] | None = None, + selection: IndexLabel | None = None, as_index: bool = True, sort: bool = True, group_keys: bool = True, @@ -592,7 +592,7 @@ def _assure_grouper(self) -> None: @final @property - def groups(self) -> Dict[Hashable, np.ndarray]: + def groups(self) -> dict[Hashable, np.ndarray]: """ Dict {group name -> group labels}. """ @@ -755,7 +755,7 @@ def _set_result_index_ordered( return result @final - def _dir_additions(self) -> Set[str]: + def _dir_additions(self) -> set[str]: return self.obj._dir_additions() | self._apply_allowlist def __getattr__(self, attr: str): @@ -793,7 +793,7 @@ def __getattr__(self, attr: str): @Appender(_pipe_template) def pipe( self, - func: Union[Callable[..., T], Tuple[Callable[..., T], str]], + func: Callable[..., T] | tuple[Callable[..., T], str], *args, **kwargs, ) -> T: @@ -866,7 +866,7 @@ def get_group(self, name, obj=None): return obj._take_with_is_copy(inds, axis=self.axis) - def __iter__(self) -> Iterator[Tuple[Hashable, FrameOrSeries]]: + def __iter__(self) -> Iterator[tuple[Hashable, FrameOrSeries]]: """ Groupby iterator. @@ -994,7 +994,7 @@ def _cumcount_array(self, ascending: bool = True): def _cython_transform( self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs ): - output: Dict[base.OutputKey, np.ndarray] = {} + output: dict[base.OutputKey, np.ndarray] = {} for idx, obj in enumerate(self._iterate_slices()): name = obj.name @@ -1018,7 +1018,7 @@ def _cython_transform( return self._wrap_transformed_output(output) def _wrap_aggregated_output( - self, output: Mapping[base.OutputKey, np.ndarray], index: Optional[Index] + self, output: Mapping[base.OutputKey, np.ndarray], index: Index | None ): raise AbstractMethodError(self) @@ -1067,7 +1067,7 @@ def _agg_general( def _cython_agg_general( self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1 ): - output: Dict[base.OutputKey, Union[np.ndarray, DatetimeArray]] = {} + output: dict[base.OutputKey, np.ndarray | DatetimeArray] = {} # Ideally we would be able to enumerate self._iterate_slices and use # the index from enumeration as the key of output, but ohlc in particular # returns a (n x 4) array. Output requires 1D ndarrays as values, so we @@ -1180,7 +1180,7 @@ def _python_agg_general(self, func, *args, **kwargs): f = lambda x: func(x, *args, **kwargs) # iterate through "columns" ex exclusions to populate output dict - output: Dict[base.OutputKey, np.ndarray] = {} + output: dict[base.OutputKey, np.ndarray] = {} for idx, obj in enumerate(self._iterate_slices()): name = obj.name @@ -1371,7 +1371,7 @@ class GroupBy(BaseGroupBy[FrameOrSeries]): @final @property - def _obj_1d_constructor(self) -> Type[Series]: + def _obj_1d_constructor(self) -> type[Series]: # GH28330 preserve subclassed Series/DataFrames if isinstance(self.obj, DataFrame): return self.obj._constructor_sliced @@ -1384,7 +1384,7 @@ def _bool_agg(self, val_test, skipna): Shared func to call any / all Cython GroupBy implementations. """ - def objs_to_bool(vals: np.ndarray) -> Tuple[np.ndarray, Type]: + def objs_to_bool(vals: np.ndarray) -> tuple[np.ndarray, type]: if is_object_dtype(vals): vals = np.array([bool(x) for x in vals]) else: @@ -1392,7 +1392,7 @@ def objs_to_bool(vals: np.ndarray) -> Tuple[np.ndarray, Type]: return vals.view(np.uint8), bool - def result_to_bool(result: np.ndarray, inference: Type) -> np.ndarray: + def result_to_bool(result: np.ndarray, inference: type) -> np.ndarray: return result.astype(inference, copy=False) return self._get_cythonized_result( @@ -2004,7 +2004,7 @@ def backfill(self, limit=None): @final @Substitution(name="groupby") @Substitution(see_also=_common_see_also) - def nth(self, n: Union[int, List[int]], dropna: Optional[str] = None) -> DataFrame: + def nth(self, n: int | list[int], dropna: str | None = None) -> DataFrame: """ Take the nth row from each group if n is an int, or a subset of rows if n is a list of ints. @@ -2212,7 +2212,7 @@ def quantile(self, q=0.5, interpolation: str = "linear"): """ from pandas import concat - def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]: + def pre_processor(vals: np.ndarray) -> tuple[np.ndarray, type | None]: if is_object_dtype(vals): raise TypeError( "'quantile' cannot be performed against 'object' dtypes!" @@ -2234,7 +2234,7 @@ def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]: return vals, inference - def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray: + def post_processor(vals: np.ndarray, inference: type | None) -> np.ndarray: if inference: # Check for edge case if not ( @@ -2558,7 +2558,7 @@ def _get_cythonized_result( needs_counts: bool = False, needs_values: bool = False, needs_2d: bool = False, - min_count: Optional[int] = None, + min_count: int | None = None, needs_mask: bool = False, needs_ngroups: bool = False, result_is_index: bool = False, @@ -2632,7 +2632,7 @@ def _get_cythonized_result( grouper = self.grouper labels, _, ngroups = grouper.group_info - output: Dict[base.OutputKey, np.ndarray] = {} + output: dict[base.OutputKey, np.ndarray] = {} base_func = getattr(libgroupby, how) error_msg = "" @@ -2947,10 +2947,10 @@ def _reindex_output( @final def sample( self, - n: Optional[int] = None, - frac: Optional[float] = None, + n: int | None = None, + frac: float | None = None, replace: bool = False, - weights: Optional[Union[Sequence, Series]] = None, + weights: Sequence | Series | None = None, random_state=None, ): """ @@ -3061,10 +3061,10 @@ def sample( @doc(GroupBy) def get_groupby( obj: NDFrame, - by: Optional[_KeysArgType] = None, + by: _KeysArgType | None = None, axis: int = 0, level=None, - grouper: Optional[ops.BaseGrouper] = None, + grouper: ops.BaseGrouper | None = None, exclusions=None, selection=None, as_index: bool = True, @@ -3076,7 +3076,7 @@ def get_groupby( dropna: bool = True, ) -> GroupBy: - klass: Type[GroupBy] + klass: type[GroupBy] if isinstance(obj, Series): from pandas.core.groupby.generic import SeriesGroupBy diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index c7dc6d021a4c3..d9e48c7d2bcb1 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -236,7 +236,7 @@ class Grouper: Freq: 17T, dtype: int64 """ - _attributes: Tuple[str, ...] = ("key", "level", "freq", "axis", "sort") + _attributes: tuple[str, ...] = ("key", "level", "freq", "axis", "sort") def __new__(cls, *args, **kwargs): if kwargs.get("freq") is not None: @@ -416,7 +416,7 @@ def __init__( self, index: Index, grouper=None, - obj: Optional[FrameOrSeries] = None, + obj: FrameOrSeries | None = None, name=None, level=None, sort: bool = True, @@ -545,8 +545,8 @@ def __repr__(self) -> str: def __iter__(self): return iter(self.indices) - _codes: Optional[np.ndarray] = None - _group_index: Optional[Index] = None + _codes: np.ndarray | None = None + _group_index: Index | None = None @property def ngroups(self) -> int: @@ -604,7 +604,7 @@ def _make_codes(self) -> None: self._group_index = uniques @cache_readonly - def groups(self) -> Dict[Hashable, np.ndarray]: + def groups(self) -> dict[Hashable, np.ndarray]: return self.index.groupby(Categorical.from_codes(self.codes, self.group_index)) @@ -618,7 +618,7 @@ def get_grouper( mutated: bool = False, validate: bool = True, dropna: bool = True, -) -> Tuple[ops.BaseGrouper, Set[Hashable], FrameOrSeries]: +) -> tuple[ops.BaseGrouper, set[Hashable], FrameOrSeries]: """ Create and return a BaseGrouper, which is an internal mapping of how to create the grouper indexers. @@ -742,8 +742,8 @@ def get_grouper( else: levels = [level] * len(keys) - groupings: List[Grouping] = [] - exclusions: Set[Hashable] = set() + groupings: list[Grouping] = [] + exclusions: set[Hashable] = set() # if the actual grouper should be obj[key] def is_in_axis(key) -> bool: diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 1b1406fe9cd0f..1dfbdc54f5c03 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -104,14 +104,14 @@ def __init__( sort: bool = True, group_keys: bool = True, mutated: bool = False, - indexer: Optional[np.ndarray] = None, + indexer: np.ndarray | None = None, dropna: bool = True, ): assert isinstance(axis, Index), axis self._filter_empty_groups = self.compressed = len(groupings) != 1 self.axis = axis - self._groupings: List[grouper.Grouping] = list(groupings) + self._groupings: list[grouper.Grouping] = list(groupings) self.sort = sort self.group_keys = group_keys self.mutated = mutated @@ -119,7 +119,7 @@ def __init__( self.dropna = dropna @property - def groupings(self) -> List[grouper.Grouping]: + def groupings(self) -> list[grouper.Grouping]: return self._groupings @property @@ -135,7 +135,7 @@ def nkeys(self) -> int: def get_iterator( self, data: FrameOrSeries, axis: int = 0 - ) -> Iterator[Tuple[Hashable, FrameOrSeries]]: + ) -> Iterator[tuple[Hashable, FrameOrSeries]]: """ Groupby iterator @@ -253,15 +253,15 @@ def indices(self): return get_indexer_dict(codes_list, keys) @property - def codes(self) -> List[np.ndarray]: + def codes(self) -> list[np.ndarray]: return [ping.codes for ping in self.groupings] @property - def levels(self) -> List[Index]: + def levels(self) -> list[Index]: return [ping.group_index for ping in self.groupings] @property - def names(self) -> List[Hashable]: + def names(self) -> list[Hashable]: return [ping.name for ping in self.groupings] @final @@ -278,7 +278,7 @@ def size(self) -> Series: return Series(out, index=self.result_index, dtype="int64") @cache_readonly - def groups(self) -> Dict[Hashable, np.ndarray]: + def groups(self) -> dict[Hashable, np.ndarray]: """ dict {group name -> group labels} """ if len(self.groupings) == 1: return self.groupings[0].groups @@ -312,7 +312,7 @@ def codes_info(self) -> np.ndarray: return codes @final - def _get_compressed_codes(self) -> Tuple[np.ndarray, np.ndarray]: + def _get_compressed_codes(self) -> tuple[np.ndarray, np.ndarray]: all_codes = self.codes if len(all_codes) > 1: group_index = get_group_index(all_codes, self.shape, sort=True, xnull=True) @@ -327,7 +327,7 @@ def ngroups(self) -> int: return len(self.result_index) @property - def reconstructed_codes(self) -> List[np.ndarray]: + def reconstructed_codes(self) -> list[np.ndarray]: codes = self.codes comp_ids, obs_ids, _ = self.group_info return decons_obs_group_ids(comp_ids, obs_ids, self.shape, codes, xnull=True) @@ -344,7 +344,7 @@ def result_index(self) -> Index: ) @final - def get_group_levels(self) -> List[Index]: + def get_group_levels(self) -> list[Index]: if not self.compressed and len(self.groupings) == 1: return [self.groupings[0].result_index] @@ -497,7 +497,7 @@ def _disallow_invalid_ops(self, values: ArrayLike, how: str): @final def _ea_wrap_cython_operation( self, kind: str, values, how: str, axis: int, min_count: int = -1, **kwargs - ) -> Tuple[np.ndarray, Optional[List[str]]]: + ) -> tuple[np.ndarray, list[str] | None]: """ If we have an ExtensionArray, unwrap, call _cython_operation, and re-wrap if appropriate. @@ -890,7 +890,7 @@ def group_info(self): ) @cache_readonly - def reconstructed_codes(self) -> List[np.ndarray]: + def reconstructed_codes(self) -> list[np.ndarray]: # get unique result indices, and prepend 0 as groupby starts from the first return [np.r_[0, np.flatnonzero(self.bins[1:] != self.bins[:-1]) + 1]] @@ -902,15 +902,15 @@ def result_index(self): return self.binlabels @property - def levels(self) -> List[Index]: + def levels(self) -> list[Index]: return [self.binlabels] @property - def names(self) -> List[Hashable]: + def names(self) -> list[Hashable]: return [self.binlabels.name] @property - def groupings(self) -> List[grouper.Grouping]: + def groupings(self) -> list[grouper.Grouping]: return [ grouper.Grouping(lvl, lvl, in_axis=False, level=None, name=name) for lvl, name in zip(self.levels, self.names) @@ -1013,7 +1013,7 @@ def get_splitter( data: FrameOrSeries, labels: np.ndarray, ngroups: int, axis: int = 0 ) -> DataSplitter: if isinstance(data, Series): - klass: Type[DataSplitter] = SeriesSplitter + klass: type[DataSplitter] = SeriesSplitter else: # i.e. DataFrame klass = FrameSplitter diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 205bbcc07fc76..c1fe314842ba1 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -141,7 +141,7 @@ _Identity = NewType("_Identity", object) -def disallow_kwargs(kwargs: Dict[str, Any]): +def disallow_kwargs(kwargs: dict[str, Any]): if kwargs: raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}") @@ -215,7 +215,7 @@ class Index(IndexOpsMixin, PandasObject): """ # tolist is not actually deprecated, just suppressed in the __dir__ - _hidden_attrs: FrozenSet[str] = ( + _hidden_attrs: frozenset[str] = ( PandasObject._hidden_attrs | IndexOpsMixin._hidden_attrs | frozenset(["contains", "set_value"]) @@ -241,8 +241,8 @@ def _outer_indexer(self, left, right): return libjoin.outer_join_indexer(left, right) _typ = "index" - _data: Union[ExtensionArray, np.ndarray] - _id: Optional[_Identity] = None + _data: ExtensionArray | np.ndarray + _id: _Identity | None = None _name: Hashable = None # MultiIndex.levels previously allowed setting the index name. We # don't allow this anymore, and raise if it happens rather than @@ -655,7 +655,7 @@ def _engine(self): return self._engine_type(lambda: target_values, len(self)) @cache_readonly - def _dir_additions_for_owner(self) -> Set[str_t]: + def _dir_additions_for_owner(self) -> set[str_t]: """ Add the string-like labels to the owner dataframe/series dir output. @@ -902,10 +902,10 @@ def repeat(self, repeats, axis=None): def copy( self: _IndexT, - name: Optional[Hashable] = None, + name: Hashable | None = None, deep: bool = False, - dtype: Optional[Dtype] = None, - names: Optional[Sequence[Hashable]] = None, + dtype: Dtype | None = None, + names: Sequence[Hashable] | None = None, ) -> _IndexT: """ Make a copy of this object. @@ -1034,9 +1034,9 @@ def _mpl_repr(self): def format( self, name: bool = False, - formatter: Optional[Callable] = None, + formatter: Callable | None = None, na_rep: str_t = "NaN", - ) -> List[str_t]: + ) -> list[str_t]: """ Render a string representation of the Index. """ @@ -1054,8 +1054,8 @@ def format( return self._format_with_header(header, na_rep=na_rep) def _format_with_header( - self, header: List[str_t], na_rep: str_t = "NaN" - ) -> List[str_t]: + self, header: list[str_t], na_rep: str_t = "NaN" + ) -> list[str_t]: from pandas.io.formats.format import format_array values = self._values @@ -1325,7 +1325,7 @@ def name(self, value): @final def _validate_names( self, name=None, names=None, deep: bool = False - ) -> List[Hashable]: + ) -> list[Hashable]: """ Handles the quirks of having a singular 'name' parameter for general Index and plural 'names' parameter for MultiIndex. @@ -1720,7 +1720,7 @@ def droplevel(self, level=0): return self._drop_level_numbers(levnums) @final - def _drop_level_numbers(self, levnums: List[int]): + def _drop_level_numbers(self, levnums: list[int]): """ Drop MultiIndex levels by level _number_, not name. """ @@ -3460,7 +3460,7 @@ def _get_nearest_indexer(self, target: Index, limit, tolerance) -> np.ndarray: @final def _filter_indexer_tolerance( self, - target: Union[Index, np.ndarray, ExtensionArray], + target: Index | np.ndarray | ExtensionArray, indexer: np.ndarray, tolerance, ) -> np.ndarray: @@ -4250,7 +4250,7 @@ def array(self) -> ExtensionArray: return array @property - def _values(self) -> Union[ExtensionArray, np.ndarray]: + def _values(self) -> ExtensionArray | np.ndarray: """ The best array representation. @@ -4526,7 +4526,7 @@ def append(self, other): return self._concat(to_concat, name) - def _concat(self, to_concat: List[Index], name: Hashable) -> Index: + def _concat(self, to_concat: list[Index], name: Hashable) -> Index: """ Concatenate multiple Index objects. """ @@ -4771,7 +4771,7 @@ def sort_values( return_indexer: bool = False, ascending: bool = True, na_position: str_t = "last", - key: Optional[Callable] = None, + key: Callable | None = None, ): """ Return a sorted copy of the index. @@ -5416,10 +5416,10 @@ def _get_string_slice(self, key: str_t): def slice_indexer( self, - start: Optional[Hashable] = None, - end: Optional[Hashable] = None, - step: Optional[int] = None, - kind: Optional[str_t] = None, + start: Hashable | None = None, + end: Hashable | None = None, + step: int | None = None, + kind: str_t | None = None, ) -> slice: """ Compute the slice indexer for input labels and step. @@ -6028,7 +6028,7 @@ def ensure_index_from_sequences(sequences, names=None): def ensure_index( - index_like: Union[AnyArrayLike, Sequence], copy: bool = False + index_like: AnyArrayLike | Sequence, copy: bool = False ) -> Index: """ Ensure that we have an index from some index-like object. @@ -6123,7 +6123,7 @@ def ensure_has_len(seq): return seq -def trim_front(strings: List[str]) -> List[str]: +def trim_front(strings: list[str]) -> list[str]: """ Trims zeros and decimal points. @@ -6335,7 +6335,7 @@ def _try_convert_to_int_array( raise ValueError -def get_unanimous_names(*indexes: Index) -> Tuple[Hashable, ...]: +def get_unanimous_names(*indexes: Index) -> tuple[Hashable, ...]: """ Return common name if all indices agree, otherwise None (level-by-level). diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e9c77fb12d3ff..83f8f0a7201e9 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -229,8 +229,8 @@ class DatetimeIndex(DatetimeTimedeltaMixin): _is_numeric_dtype = False _data: DatetimeArray - inferred_freq: Optional[str] - tz: Optional[tzinfo] + inferred_freq: str | None + tz: tzinfo | None # -------------------------------------------------------------------- # methods that dispatch to DatetimeArray and wrap result @@ -289,7 +289,7 @@ def __new__( ambiguous="raise", dayfirst=False, yearfirst=False, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, copy=False, name=None, ): @@ -398,7 +398,7 @@ def union_many(self, others): return this.rename(res_name) return this - def _maybe_utc_convert(self, other: Index) -> Tuple[DatetimeIndex, Index]: + def _maybe_utc_convert(self, other: Index) -> tuple[DatetimeIndex, Index]: this = self if isinstance(other, DatetimeIndex): @@ -1082,7 +1082,7 @@ def date_range( def bdate_range( start=None, end=None, - periods: Optional[int] = None, + periods: int | None = None, freq="B", tz=None, normalize=True, diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 40413bfb40b4b..8af0fe4fdb0d5 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -196,7 +196,7 @@ def __new__( cls, data, closed=None, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, copy: bool = False, name=None, verify_integrity: bool = True, @@ -257,7 +257,7 @@ def from_breaks( closed: str = "right", name=None, copy: bool = False, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, ): with rewrite_exception("IntervalArray", cls.__name__): array = IntervalArray.from_breaks( @@ -288,7 +288,7 @@ def from_arrays( closed: str = "right", name=None, copy: bool = False, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, ): with rewrite_exception("IntervalArray", cls.__name__): array = IntervalArray.from_arrays( @@ -318,7 +318,7 @@ def from_tuples( closed: str = "right", name=None, copy: bool = False, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, ): with rewrite_exception("IntervalArray", cls.__name__): arr = IntervalArray.from_tuples(data, closed=closed, copy=copy, dtype=dtype) @@ -581,8 +581,8 @@ def _searchsorted_monotonic(self, label, side, exclude_label=False): # Indexing Methods def get_loc( - self, key, method: Optional[str] = None, tolerance=None - ) -> Union[int, slice, np.ndarray]: + self, key, method: str | None = None, tolerance=None + ) -> int | slice | np.ndarray: """ Get integer location, slice or boolean mask for requested label. @@ -650,9 +650,9 @@ def get_loc( def _get_indexer( self, target: Index, - method: Optional[str] = None, - limit: Optional[int] = None, - tolerance: Optional[Any] = None, + method: str | None = None, + limit: int | None = None, + tolerance: Any | None = None, ) -> np.ndarray: if isinstance(target, IntervalIndex): @@ -685,7 +685,7 @@ def _get_indexer( return ensure_platform_int(indexer) @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) - def get_indexer_non_unique(self, target: Index) -> Tuple[np.ndarray, np.ndarray]: + def get_indexer_non_unique(self, target: Index) -> tuple[np.ndarray, np.ndarray]: target = ensure_index(target) if isinstance(target, IntervalIndex) and not self._should_compare(target): @@ -704,7 +704,7 @@ def get_indexer_non_unique(self, target: Index) -> Tuple[np.ndarray, np.ndarray] return ensure_platform_int(indexer), ensure_platform_int(missing) - def _get_indexer_pointwise(self, target: Index) -> Tuple[np.ndarray, np.ndarray]: + def _get_indexer_pointwise(self, target: Index) -> tuple[np.ndarray, np.ndarray]: """ pointwise implementation for get_indexer and get_indexer_non_unique. """ @@ -848,7 +848,7 @@ def insert(self, loc, item): # Rendering Methods # __repr__ associated methods are based on MultiIndex - def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]: + def _format_with_header(self, header: list[str], na_rep: str = "NaN") -> list[str]: return header + list(self._format_native_types(na_rep=na_rep)) def _format_native_types(self, na_rep="NaN", quoting=None, **kwargs): diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 0d30c1665df34..0b94c40731d98 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -268,7 +268,7 @@ class MultiIndex(Index): _comparables = ["names"] rename = Index.set_names - sortorder: Optional[int] + sortorder: int | None # -------------------------------------------------------------------- # Constructors @@ -320,7 +320,7 @@ def __new__( return result - def _validate_codes(self, level: List, code: List): + def _validate_codes(self, level: list, code: list): """ Reassign code values as -1 if their corresponding levels are NaN. @@ -342,7 +342,7 @@ def _validate_codes(self, level: List, code: List): return code def _verify_integrity( - self, codes: Optional[List] = None, levels: Optional[List] = None + self, codes: list | None = None, levels: list | None = None ): """ Parameters @@ -476,8 +476,8 @@ def from_arrays(cls, arrays, sortorder=None, names=lib.no_default) -> MultiIndex def from_tuples( cls, tuples, - sortorder: Optional[int] = None, - names: Optional[Sequence[Hashable]] = None, + sortorder: int | None = None, + names: Sequence[Hashable] | None = None, ): """ Convert list of tuples to MultiIndex. @@ -519,7 +519,7 @@ def from_tuples( elif is_iterator(tuples): tuples = list(tuples) - arrays: List[Sequence[Hashable]] + arrays: list[Sequence[Hashable]] if len(tuples) == 0: if names is None: raise TypeError("Cannot infer number of levels from empty list") @@ -1310,14 +1310,14 @@ def _format_native_types(self, na_rep="nan", **kwargs): def format( self, - name: Optional[bool] = None, - formatter: Optional[Callable] = None, - na_rep: Optional[str] = None, + name: bool | None = None, + formatter: Callable | None = None, + na_rep: str | None = None, names: bool = False, space: int = 2, sparsify=None, adjoin: bool = True, - ) -> List: + ) -> list: if name is not None: names = name @@ -2646,7 +2646,7 @@ def _get_indexer(self, target: Index, method=None, limit=None, tolerance=None): return ensure_platform_int(indexer) def get_slice_bound( - self, label: Union[Hashable, Sequence[Hashable]], side: str, kind: str + self, label: Hashable | Sequence[Hashable], side: str, kind: str ) -> int: """ For an ordered MultiIndex, compute slice bound @@ -2971,7 +2971,7 @@ def get_loc_level(self, key, level=0, drop_level: bool = True): return self._get_loc_level(key, level=level, drop_level=drop_level) def _get_loc_level( - self, key, level: Union[int, List[int]] = 0, drop_level: bool = True + self, key, level: int | list[int] = 0, drop_level: bool = True ): """ get_loc_level but with `level` known to be positional, not name-based. @@ -3247,7 +3247,7 @@ def _convert_to_indexer(r) -> Int64Index: return Int64Index(r) def _update_indexer( - idxr: Optional[Index], indexer: Optional[Index], key + idxr: Index | None, indexer: Index | None, key ) -> Index: if indexer is None: indexer = Index(np.arange(n)) @@ -3270,7 +3270,7 @@ def _update_indexer( elif is_list_like(k): # a collection of labels to include from this level (these # are or'd) - indexers: Optional[Int64Index] = None + indexers: Int64Index | None = None for x in k: try: idxrs = _convert_to_indexer( @@ -3327,7 +3327,7 @@ def _update_indexer( def _reorder_indexer( self, - seq: Tuple[Union[Scalar, Iterable, AnyArrayLike], ...], + seq: tuple[Scalar | Iterable | AnyArrayLike, ...], indexer: Int64Index, ) -> Int64Index: """ @@ -3361,7 +3361,7 @@ def _reorder_indexer( return indexer n = len(self) - keys: Tuple[np.ndarray, ...] = () + keys: tuple[np.ndarray, ...] = () # For each level of the sequence in seq, map the level codes with the # order they appears in a list-like sequence # This mapping is then use to reorder the indexer @@ -3759,7 +3759,7 @@ def isin(self, values, level=None): __inv__ = make_invalid_op("__inv__") -def _lexsort_depth(codes: List[np.ndarray], nlevels: int) -> int: +def _lexsort_depth(codes: list[np.ndarray], nlevels: int) -> int: """Count depth (up to a maximum of `nlevels`) with which codes are lexsorted.""" int64_codes = [ensure_int64(level_codes) for level_codes in codes] for k in range(nlevels, 0, -1): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 879f2ddbcfcd1..833659497aff7 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -194,7 +194,7 @@ def __new__( data=None, ordinal=None, freq=None, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, copy=False, name=None, **fields, @@ -589,7 +589,7 @@ def memory_usage(self, deep: bool = False) -> int: def period_range( - start=None, end=None, periods: Optional[int] = None, freq=None, name=None + start=None, end=None, periods: int | None = None, freq=None, name=None ) -> PeriodIndex: """ Return a fixed frequency PeriodIndex. diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 0f73b62faf6e7..af681ba1419ee 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -90,7 +90,7 @@ def __new__( start=None, stop=None, step=None, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, copy=False, name=None, ): @@ -123,7 +123,7 @@ def __new__( @classmethod def from_range( - cls, data: range, name=None, dtype: Optional[Dtype] = None + cls, data: range, name=None, dtype: Dtype | None = None ) -> RangeIndex: """ Create RangeIndex from a range object. @@ -206,7 +206,7 @@ def _format_data(self, name=None): # we are formatting thru the attributes return None - def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]: + def _format_with_header(self, header: list[str], na_rep: str = "NaN") -> list[str]: if not len(self._range): return header first_val_str = str(self._range[0]) @@ -435,7 +435,7 @@ def _shallow_copy(self, values=None, name: Hashable = no_default): return result @doc(Int64Index.copy) - def copy(self, name=None, deep=False, dtype: Optional[Dtype] = None, names=None): + def copy(self, name=None, deep=False, dtype: Dtype | None = None, names=None): name = self._validate_names(name=name, names=names, deep=deep)[0] new_index = self._shallow_copy(name=name) @@ -491,8 +491,8 @@ def argsort(self, *args, **kwargs) -> np.ndarray: return np.arange(len(self) - 1, -1, -1) def factorize( - self, sort: bool = False, na_sentinel: Optional[int] = -1 - ) -> Tuple[np.ndarray, RangeIndex]: + self, sort: bool = False, na_sentinel: int | None = -1 + ) -> tuple[np.ndarray, RangeIndex]: codes = np.arange(len(self), dtype=np.intp) uniques = self if sort and self.step < 0: diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ce7d5b511e811..7ddd26cda1a6c 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -716,7 +716,7 @@ def _validate_key(self, key, axis: int): """ raise AbstractMethodError(self) - def _has_valid_tuple(self, key: Tuple): + def _has_valid_tuple(self, key: tuple): """ Check the key for valid keys across my indexer. """ @@ -730,7 +730,7 @@ def _has_valid_tuple(self, key: Tuple): f"[{self._valid_types}] types" ) from err - def _is_nested_tuple_indexer(self, tup: Tuple) -> bool: + def _is_nested_tuple_indexer(self, tup: tuple) -> bool: """ Returns ------- @@ -763,7 +763,7 @@ def _validate_key_length(self, key: Sequence[Any]) -> None: if len(key) > self.ndim: raise IndexingError("Too many indexers") - def _getitem_tuple_same_dim(self, tup: Tuple): + def _getitem_tuple_same_dim(self, tup: tuple): """ Index with indexers that should return an object of the same dimension as self.obj. @@ -782,7 +782,7 @@ def _getitem_tuple_same_dim(self, tup: Tuple): return retval - def _getitem_lowerdim(self, tup: Tuple): + def _getitem_lowerdim(self, tup: tuple): # we can directly get the axis result since the axis is specified if self.axis is not None: @@ -835,7 +835,7 @@ def _getitem_lowerdim(self, tup: Tuple): raise IndexingError("not applicable") - def _getitem_nested_tuple(self, tup: Tuple): + def _getitem_nested_tuple(self, tup: tuple): # we have a nested tuple so have at least 1 multi-index level # we should be able to match up the dimensionality here @@ -906,10 +906,10 @@ def __getitem__(self, key): maybe_callable = com.apply_if_callable(key, self.obj) return self._getitem_axis(maybe_callable, axis=axis) - def _is_scalar_access(self, key: Tuple): + def _is_scalar_access(self, key: tuple): raise NotImplementedError() - def _getitem_tuple(self, tup: Tuple): + def _getitem_tuple(self, tup: tuple): raise AbstractMethodError(self) def _getitem_axis(self, key, axis: int): @@ -950,7 +950,7 @@ def _validate_key(self, key, axis: int): def _has_valid_setitem_indexer(self, indexer) -> bool: return True - def _is_scalar_access(self, key: Tuple) -> bool: + def _is_scalar_access(self, key: tuple) -> bool: """ Returns ------- @@ -984,7 +984,7 @@ def _is_scalar_access(self, key: Tuple) -> bool: # ------------------------------------------------------------------- # MultiIndex Handling - def _multi_take_opportunity(self, tup: Tuple) -> bool: + def _multi_take_opportunity(self, tup: tuple) -> bool: """ Check whether there is the possibility to use ``_multi_take``. @@ -1008,7 +1008,7 @@ def _multi_take_opportunity(self, tup: Tuple) -> bool: # just too complicated return not any(com.is_bool_indexer(x) for x in tup) - def _multi_take(self, tup: Tuple): + def _multi_take(self, tup: tuple): """ Create the indexers for the passed tuple of keys, and executes the take operation. This allows the take operation to be @@ -1064,7 +1064,7 @@ def _getitem_iterable(self, key, axis: int): {axis: [keyarr, indexer]}, copy=True, allow_dups=True ) - def _getitem_tuple(self, tup: Tuple): + def _getitem_tuple(self, tup: tuple): with suppress(IndexingError): return self._getitem_lowerdim(tup) @@ -1081,7 +1081,7 @@ def _get_label(self, label, axis: int): # GH#5667 this will fail if the label is not present in the axis. return self.obj.xs(label, axis=axis) - def _handle_lowerdim_multi_index_axis0(self, tup: Tuple): + def _handle_lowerdim_multi_index_axis0(self, tup: tuple): # we have an axis0 multi-index, handle or raise axis = self.axis or 0 try: @@ -1418,7 +1418,7 @@ def _has_valid_setitem_indexer(self, indexer) -> bool: return True - def _is_scalar_access(self, key: Tuple) -> bool: + def _is_scalar_access(self, key: tuple) -> bool: """ Returns ------- @@ -1455,7 +1455,7 @@ def _validate_integer(self, key: int, axis: int) -> None: # ------------------------------------------------------------------- - def _getitem_tuple(self, tup: Tuple): + def _getitem_tuple(self, tup: tuple): self._has_valid_tuple(tup) with suppress(IndexingError): @@ -2205,7 +2205,7 @@ def _convert_key(self, key, is_setter: bool = False): return key -def _tuplify(ndim: int, loc: Hashable) -> Tuple[Union[Hashable, slice], ...]: +def _tuplify(ndim: int, loc: Hashable) -> tuple[Hashable | slice, ...]: """ Given an indexer for the first dimension, create an equivalent tuple for indexing over all dimensions. @@ -2219,7 +2219,7 @@ def _tuplify(ndim: int, loc: Hashable) -> Tuple[Union[Hashable, slice], ...]: ------- tuple """ - _tup: List[Union[Hashable, slice]] + _tup: list[Hashable | slice] _tup = [slice(None, None) for _ in range(ndim)] _tup[0] = loc return tuple(_tup) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 0f677ff3180be..801786b45ac9a 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -60,13 +60,13 @@ class ArrayManager(DataManager): "arrays", ] - arrays: List[Union[np.ndarray, ExtensionArray]] - _axes: List[Index] + arrays: list[np.ndarray | ExtensionArray] + _axes: list[Index] def __init__( self, - arrays: List[Union[np.ndarray, ExtensionArray]], - axes: List[Index], + arrays: list[np.ndarray | ExtensionArray], + axes: list[Index], do_integrity_check: bool = True, ): # Note: we are storing the axes in "_axes" in the (row, columns) order @@ -83,7 +83,7 @@ def make_empty(self: T, axes=None) -> T: if axes is None: axes = [self.axes[1:], Index([])] - arrays: List[Union[np.ndarray, ExtensionArray]] = [] + arrays: list[np.ndarray | ExtensionArray] = [] return type(self)(arrays, axes) @property @@ -91,19 +91,19 @@ def items(self) -> Index: return self._axes[1] @property - def axes(self) -> List[Index]: # type: ignore[override] + def axes(self) -> list[Index]: # type: ignore[override] # mypy doesn't work to override attribute with property # see https://github.com/python/mypy/issues/4125 """Axes is BlockManager-compatible order (columns, rows)""" return [self._axes[1], self._axes[0]] @property - def shape(self) -> Tuple[int, ...]: + def shape(self) -> tuple[int, ...]: # this still gives the BlockManager-compatible transposed shape return tuple(len(ax) for ax in self.axes) @property - def shape_proper(self) -> Tuple[int, ...]: + def shape_proper(self) -> tuple[int, ...]: # this returns (n_rows, n_columns) return tuple(len(ax) for ax in self._axes) @@ -172,7 +172,7 @@ def _verify_integrity(self) -> None: def reduce( self: T, func: Callable, ignore_failures: bool = False - ) -> Tuple[T, np.ndarray]: + ) -> tuple[T, np.ndarray]: # TODO this still fails because `func` assumes to work on 2D arrays # TODO implement ignore_failures assert self.ndim == 2 @@ -202,7 +202,7 @@ def operate_blockwise(self, other: ArrayManager, array_op) -> ArrayManager: def apply( self: T, f, - align_keys: Optional[List[str]] = None, + align_keys: list[str] | None = None, ignore_failures: bool = False, **kwargs, ) -> T: @@ -225,8 +225,8 @@ def apply( assert "filter" not in kwargs align_keys = align_keys or [] - result_arrays: List[np.ndarray] = [] - result_indices: List[int] = [] + result_arrays: list[np.ndarray] = [] + result_indices: list[int] = [] # fillna: Series/DataFrame is responsible for making sure value is aligned aligned_args = {k: kwargs[k] for k in align_keys} @@ -265,7 +265,7 @@ def apply( result_arrays.append(applied) result_indices.append(i) - new_axes: List[Index] + new_axes: list[Index] if ignore_failures: # TODO copy? new_axes = [self._axes[0], self._axes[1][result_indices]] @@ -429,8 +429,8 @@ def replace(self, value, **kwargs) -> ArrayManager: def replace_list( self: T, - src_list: List[Any], - dest_list: List[Any], + src_list: list[Any], + dest_list: list[Any], inplace: bool = False, regex: bool = False, ) -> T: @@ -657,7 +657,7 @@ def idelete(self, indexer): self.arrays = [self.arrays[i] for i in np.nonzero(to_keep)[0]] self._axes = [self._axes[0], self._axes[1][to_keep]] - def iset(self, loc: Union[int, slice, np.ndarray], value): + def iset(self, loc: int | slice | np.ndarray, value): """ Set new item in-place. Does not consolidate. Adds new Block if not contained in the current set of items @@ -872,7 +872,7 @@ def unstack(self, unstacker, fill_value) -> ArrayManager: # quantile -def _interleaved_dtype(blocks) -> Optional[DtypeObj]: +def _interleaved_dtype(blocks) -> DtypeObj | None: """ Find the common dtype for `blocks`. diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index eb8bb0fe90e9a..a782ec3463157 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -93,7 +93,7 @@ class Block(PandasObject): Index-ignorant; let the container take care of that """ - values: Union[np.ndarray, ExtensionArray] + values: np.ndarray | ExtensionArray __slots__ = ["_mgr_locs", "values", "ndim"] is_numeric = False @@ -239,7 +239,7 @@ def array_values(self) -> ExtensionArray: """ return PandasArray(self.values) - def get_values(self, dtype: Optional[Dtype] = None): + def get_values(self, dtype: Dtype | None = None): """ return an internal format, currently just the ndarray this is often overridden to handle to_dense like operations @@ -366,7 +366,7 @@ def delete(self, loc) -> None: self.values = np.delete(self.values, loc, 0) self.mgr_locs = self.mgr_locs.delete(loc) - def apply(self, func, **kwargs) -> List[Block]: + def apply(self, func, **kwargs) -> list[Block]: """ apply the function to my values; return a block if we are not one @@ -376,7 +376,7 @@ def apply(self, func, **kwargs) -> List[Block]: return self._split_op_result(result) - def reduce(self, func, ignore_failures: bool = False) -> List[Block]: + def reduce(self, func, ignore_failures: bool = False) -> list[Block]: # We will apply the function and reshape the result into a single-row # Block with the same mgr_locs; squeezing will be done at a higher level assert self.ndim == 2 @@ -397,7 +397,7 @@ def reduce(self, func, ignore_failures: bool = False) -> List[Block]: nb = self.make_block(res_values) return [nb] - def _split_op_result(self, result) -> List[Block]: + def _split_op_result(self, result) -> list[Block]: # See also: split_and_operate if is_extension_array_dtype(result) and result.ndim > 1: # TODO(EA2D): unnecessary with 2D EAs @@ -416,7 +416,7 @@ def _split_op_result(self, result) -> List[Block]: def fillna( self, value, limit=None, inplace: bool = False, downcast=None - ) -> List[Block]: + ) -> list[Block]: """ fillna on the block with the value. If we fail, then convert to ObjectBlock and try again @@ -457,7 +457,7 @@ def f(mask, val, idx): return self.split_and_operate(None, f, inplace) - def _split(self) -> List[Block]: + def _split(self) -> list[Block]: """ Split a block into a list of single-column blocks. """ @@ -473,7 +473,7 @@ def _split(self) -> List[Block]: def split_and_operate( self, mask, f, inplace: bool, ignore_failures: bool = False - ) -> List[Block]: + ) -> list[Block]: """ split the block per-column, and apply the callable f per-column, return a new block for each. Handle @@ -541,7 +541,7 @@ def make_a_block(nv, ref_loc): return new_blocks - def _maybe_downcast(self, blocks: List[Block], downcast=None) -> List[Block]: + def _maybe_downcast(self, blocks: list[Block], downcast=None) -> list[Block]: # no need to downcast our float # unless indicated @@ -550,7 +550,7 @@ def _maybe_downcast(self, blocks: List[Block], downcast=None) -> List[Block]: return extend_blocks([b.downcast(downcast) for b in blocks]) - def downcast(self, dtypes=None) -> List[Block]: + def downcast(self, dtypes=None) -> list[Block]: """ try to downcast each item to the dict of dtypes if present """ # turn it off completely if dtypes is False: @@ -666,7 +666,7 @@ def convert( datetime: bool = True, numeric: bool = True, timedelta: bool = True, - ) -> List[Block]: + ) -> list[Block]: """ attempt to coerce any object types to better types return a copy of the block (if copy = True) by definition we are not an ObjectBlock @@ -724,7 +724,7 @@ def replace( value, inplace: bool = False, regex: bool = False, - ) -> List[Block]: + ) -> list[Block]: """ replace the to_replace value with value, possible to create new blocks here this is just a call to putmask. regex is not used here. @@ -769,7 +769,7 @@ def _replace_regex( inplace: bool = False, convert: bool = True, mask=None, - ) -> List[Block]: + ) -> list[Block]: """ Replace elements by the given value. @@ -809,11 +809,11 @@ def _replace_regex( def _replace_list( self, - src_list: List[Any], - dest_list: List[Any], + src_list: list[Any], + dest_list: list[Any], inplace: bool = False, regex: bool = False, - ) -> List[Block]: + ) -> list[Block]: """ See BlockManager._replace_list docstring. """ @@ -850,7 +850,7 @@ def comp(s: Scalar, mask: np.ndarray, regex: bool = False) -> np.ndarray: rb = [self if inplace else self.copy()] for i, (src, dest) in enumerate(pairs): - new_rb: List["Block"] = [] + new_rb: list["Block"] = [] for blk in rb: m = masks[i] convert = i == src_len # only convert once at the end @@ -987,7 +987,7 @@ def setitem(self, indexer, value): block = self.make_block(values) return block - def putmask(self, mask, new) -> List[Block]: + def putmask(self, mask, new) -> list[Block]: """ putmask the data to the block; it is possible that we may create a new dtype of block @@ -1082,14 +1082,14 @@ def interpolate( self, method: str = "pad", axis: int = 0, - index: Optional[Index] = None, + index: Index | None = None, inplace: bool = False, - limit: Optional[int] = None, + limit: int | None = None, limit_direction: str = "forward", - limit_area: Optional[str] = None, - fill_value: Optional[Any] = None, + limit_area: str | None = None, + fill_value: Any | None = None, coerce: bool = False, - downcast: Optional[str] = None, + downcast: str | None = None, **kwargs, ): @@ -1141,10 +1141,10 @@ def _interpolate_with_fill( method: str = "pad", axis: int = 0, inplace: bool = False, - limit: Optional[int] = None, - limit_area: Optional[str] = None, - downcast: Optional[str] = None, - ) -> List[Block]: + limit: int | None = None, + limit_area: str | None = None, + downcast: str | None = None, + ) -> list[Block]: """ fillna but using the interpolate machinery """ inplace = validate_bool_kwarg(inplace, "inplace") @@ -1167,15 +1167,15 @@ def _interpolate( self, method: str, index: Index, - fill_value: Optional[Any] = None, + fill_value: Any | None = None, axis: int = 0, - limit: Optional[int] = None, + limit: int | None = None, limit_direction: str = "forward", - limit_area: Optional[str] = None, + limit_area: str | None = None, inplace: bool = False, - downcast: Optional[str] = None, + downcast: str | None = None, **kwargs, - ) -> List[Block]: + ) -> list[Block]: """ interpolate using scipy wrappers """ inplace = validate_bool_kwarg(inplace, "inplace") data = self.values if inplace else self.values.copy() @@ -1251,7 +1251,7 @@ def take_nd(self, indexer, axis: int, new_mgr_locs=None, fill_value=lib.no_defau else: return self.make_block_same_class(new_values, new_mgr_locs) - def diff(self, n: int, axis: int = 1) -> List[Block]: + def diff(self, n: int, axis: int = 1) -> list[Block]: """ return block for the diff of the values """ new_values = algos.diff(self.values, n, axis=axis, stacklevel=7) return [self.make_block(values=new_values)] @@ -1266,7 +1266,7 @@ def shift(self, periods: int, axis: int = 0, fill_value=None): return [self.make_block(new_values)] - def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]: + def where(self, other, cond, errors="raise", axis: int = 0) -> list[Block]: """ evaluate the block; return result block(s) from the result @@ -1328,7 +1328,7 @@ def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]: cond = cond.swapaxes(axis, 0) mask = np.array([cond[i].all() for i in range(cond.shape[0])], dtype=bool) - result_blocks: List[Block] = [] + result_blocks: list[Block] = [] for m in [mask, ~mask]: if m.any(): result = cast(np.ndarray, result) # EABlock overrides where @@ -1432,7 +1432,7 @@ def _replace_coerce( mask: np.ndarray, inplace: bool = True, regex: bool = False, - ) -> List[Block]: + ) -> list[Block]: """ Replace value corresponding to the given boolean array with another value. @@ -1550,7 +1550,7 @@ def set_inplace(self, locs, values): assert locs.tolist() == [0] self.values = values - def putmask(self, mask, new) -> List[Block]: + def putmask(self, mask, new) -> list[Block]: """ See Block.putmask.__doc__ """ @@ -1647,7 +1647,7 @@ def setitem(self, indexer, value): self.values[indexer] = value return self - def get_values(self, dtype: Optional[Dtype] = None): + def get_values(self, dtype: Dtype | None = None): # ExtensionArrays must be iterable, so this works. # TODO(EA2D): reshape not needed with 2D EAs return np.asarray(self.values).reshape(self.shape) @@ -1752,7 +1752,7 @@ def interpolate( placement=self.mgr_locs, ) - def diff(self, n: int, axis: int = 1) -> List[Block]: + def diff(self, n: int, axis: int = 1) -> list[Block]: if axis == 0 and n != 0: # n==0 case will be a no-op so let is fall through # Since we only have one column, the result will be all-NA. @@ -1767,7 +1767,7 @@ def diff(self, n: int, axis: int = 1) -> List[Block]: def shift( self, periods: int, axis: int = 0, fill_value: Any = None - ) -> List[ExtensionBlock]: + ) -> list[ExtensionBlock]: """ Shift the block by `periods`. @@ -1782,7 +1782,7 @@ def shift( ) ] - def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]: + def where(self, other, cond, errors="raise", axis: int = 0) -> list[Block]: cond = _extract_bool_array(cond) assert not isinstance(other, (ABCIndex, ABCSeries, ABCDataFrame)) @@ -1947,7 +1947,7 @@ def _holder(self): def fill_value(self): return np.datetime64("NaT", "ns") - def get_values(self, dtype: Optional[Dtype] = None): + def get_values(self, dtype: Dtype | None = None): """ return object dtype as boxed values, such as Timestamps/Timedelta """ @@ -1968,7 +1968,7 @@ def iget(self, key): # TODO(EA2D): this can be removed if we ever have 2D EA return self.array_values().reshape(self.shape)[key] - def diff(self, n: int, axis: int = 0) -> List[Block]: + def diff(self, n: int, axis: int = 0) -> list[Block]: """ 1st discrete difference. @@ -2009,7 +2009,7 @@ def to_native_types(self, na_rep="NaT", **kwargs): result = arr._format_native_types(na_rep=na_rep, **kwargs) return self.make_block(result) - def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]: + def where(self, other, cond, errors="raise", axis: int = 0) -> list[Block]: # TODO(EA2D): reshape unnecessary with 2D EAs arr = self.array_values().reshape(self.shape) @@ -2118,7 +2118,7 @@ def is_view(self) -> bool: # check the ndarray values of the DatetimeIndex values return self.values._data.base is not None - def get_values(self, dtype: Optional[Dtype] = None): + def get_values(self, dtype: Dtype | None = None): """ Returns an ndarray of values. @@ -2259,7 +2259,7 @@ def is_bool(self): """ return lib.is_bool_array(self.values.ravel("K")) - def reduce(self, func, ignore_failures: bool = False) -> List[Block]: + def reduce(self, func, ignore_failures: bool = False) -> list[Block]: """ For object-dtype, we operate column-wise. """ @@ -2295,7 +2295,7 @@ def convert( datetime: bool = True, numeric: bool = True, timedelta: bool = True, - ) -> List[Block]: + ) -> list[Block]: """ attempt to cast any object types to better types return a copy of the block (if copy = True) by definition we ARE an ObjectBlock!!!!! @@ -2325,7 +2325,7 @@ def f(mask, val, idx): return blocks - def _maybe_downcast(self, blocks: List[Block], downcast=None) -> List[Block]: + def _maybe_downcast(self, blocks: list[Block], downcast=None) -> list[Block]: if downcast is not None: return blocks @@ -2342,7 +2342,7 @@ def replace( value, inplace: bool = False, regex: bool = False, - ) -> List[Block]: + ) -> list[Block]: # Note: the checks we do in NDFrame.replace ensure we never get # here with listlike to_replace or value, as those cases # go through _replace_list @@ -2374,11 +2374,11 @@ class CategoricalBlock(ExtensionBlock): def _replace_list( self, - src_list: List[Any], - dest_list: List[Any], + src_list: list[Any], + dest_list: list[Any], inplace: bool = False, regex: bool = False, - ) -> List[Block]: + ) -> list[Block]: if len(algos.unique(dest_list)) == 1: # We likely got here by tiling value inside NDFrame.replace, # so un-tile here @@ -2391,7 +2391,7 @@ def replace( value, inplace: bool = False, regex: bool = False, - ) -> List[Block]: + ) -> list[Block]: inplace = validate_bool_kwarg(inplace, "inplace") result = self if inplace else self.copy() @@ -2403,7 +2403,7 @@ def replace( # Constructor Helpers -def get_block_type(values, dtype: Optional[Dtype] = None): +def get_block_type(values, dtype: Dtype | None = None): """ Find the appropriate Block subclass to use for the given values and dtype. @@ -2422,7 +2422,7 @@ def get_block_type(values, dtype: Optional[Dtype] = None): vtype = dtype.type kind = dtype.kind - cls: Type[Block] + cls: type[Block] if is_sparse(dtype): # Need this first(ish) so that Sparse[datetime] is sparse @@ -2450,7 +2450,7 @@ def get_block_type(values, dtype: Optional[Dtype] = None): return cls -def make_block(values, placement, klass=None, ndim=None, dtype: Optional[Dtype] = None): +def make_block(values, placement, klass=None, ndim=None, dtype: Dtype | None = None): # Ensure that we don't allow PandasArray / PandasDtype in internals. # For now, blocks should be backed by ndarrays when possible. if isinstance(values, ABCPandasArray): diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 9e4f535ebcbbe..f3e82ccab05f5 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -38,7 +38,7 @@ def concatenate_block_managers( - mgrs_indexers, axes: List[Index], concat_axis: int, copy: bool + mgrs_indexers, axes: list[Index], concat_axis: int, copy: bool ) -> Manager: """ Concatenate block managers into one. @@ -116,7 +116,7 @@ def concatenate_block_managers( return BlockManager(blocks, axes) -def _get_mgr_concatenation_plan(mgr: BlockManager, indexers: Dict[int, np.ndarray]): +def _get_mgr_concatenation_plan(mgr: BlockManager, indexers: dict[int, np.ndarray]): """ Construct concatenation plan for given block manager and indexers. @@ -329,7 +329,7 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike: def _concatenate_join_units( - join_units: List[JoinUnit], concat_axis: int, copy: bool + join_units: list[JoinUnit], concat_axis: int, copy: bool ) -> ArrayLike: """ Concatenate values from several join units along selected axis. @@ -375,7 +375,7 @@ def _concatenate_join_units( return concat_values -def _get_empty_dtype_and_na(join_units: Sequence[JoinUnit]) -> Tuple[DtypeObj, Any]: +def _get_empty_dtype_and_na(join_units: Sequence[JoinUnit]) -> tuple[DtypeObj, Any]: """ Return dtype and N/A values to use when concatenating specified units. @@ -455,10 +455,10 @@ def _get_empty_dtype_and_na(join_units: Sequence[JoinUnit]) -> Tuple[DtypeObj, A def _get_upcast_classes( join_units: Sequence[JoinUnit], dtypes: Sequence[DtypeObj], -) -> Dict[str, List[DtypeObj]]: +) -> dict[str, list[DtypeObj]]: """Create mapping between upcast class names and lists of dtypes.""" - upcast_classes: Dict[str, List[DtypeObj]] = defaultdict(list) - null_upcast_classes: Dict[str, List[DtypeObj]] = defaultdict(list) + upcast_classes: dict[str, list[DtypeObj]] = defaultdict(list) + null_upcast_classes: dict[str, list[DtypeObj]] = defaultdict(list) for dtype, unit in zip(dtypes, join_units): if dtype is None: continue @@ -503,7 +503,7 @@ def _select_upcast_cls_from_dtype(dtype: DtypeObj) -> str: return "float" -def _is_uniform_join_units(join_units: List[JoinUnit]) -> bool: +def _is_uniform_join_units(join_units: list[JoinUnit]) -> bool: """ Check if the join units consist of blocks of uniform type that can be concatenated using Block.concat_same_type instead of the generic diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index f864f1cddfe7a..1c8472202e3e9 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -76,7 +76,7 @@ def arrays_to_mgr( arr_names, index, columns, - dtype: Optional[DtypeObj] = None, + dtype: DtypeObj | None = None, verify_integrity: bool = True, ): """ @@ -108,7 +108,7 @@ def arrays_to_mgr( def masked_rec_array_to_mgr( - data: MaskedRecords, index, columns, dtype: Optional[DtypeObj], copy: bool + data: MaskedRecords, index, columns, dtype: DtypeObj | None, copy: bool ): """ Extract from a masked rec array and create the manager. @@ -180,7 +180,7 @@ def mgr_to_mgr(mgr, typ: str): # DataFrame Constructor Interface -def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool): +def init_ndarray(values, index, columns, dtype: DtypeObj | None, copy: bool): # input must be a ndarray, list, Series, index if isinstance(values, ABCSeries): @@ -262,12 +262,12 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool): return create_block_manager_from_blocks(block_values, [columns, index]) -def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None): +def init_dict(data: dict, index, columns, dtype: DtypeObj | None = None): """ Segregate Series based on type and coerce into matrices. Needs to handle a lot of exceptional cases. """ - arrays: Union[Sequence[Any], Series] + arrays: Sequence[Any] | Series if columns is not None: from pandas.core.series import Series @@ -311,9 +311,9 @@ def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None): def nested_data_to_arrays( data: Sequence, - columns: Optional[Index], - index: Optional[Index], - dtype: Optional[DtypeObj], + columns: Index | None, + index: Index | None, + dtype: DtypeObj | None, ): """ Convert a single sequence of arrays to multiple arrays. @@ -388,7 +388,7 @@ def convert(v): return values -def _homogenize(data, index, dtype: Optional[DtypeObj]): +def _homogenize(data, index, dtype: DtypeObj | None): oindex = None homogenized = [] @@ -428,7 +428,7 @@ def extract_index(data) -> Index: index = Index([]) elif len(data) > 0: raw_lengths = [] - indexes: List[Union[List[Hashable], Index]] = [] + indexes: list[list[Hashable] | Index] = [] have_raw_arrays = False have_series = False @@ -496,7 +496,7 @@ def _get_names_from_index(data): if not has_some_name: return ibase.default_index(len(data)) - index: List[Hashable] = list(range(len(data))) + index: list[Hashable] = list(range(len(data))) count = 0 for i, s in enumerate(data): n = getattr(s, "name", None) @@ -510,8 +510,8 @@ def _get_names_from_index(data): def _get_axes( - N: int, K: int, index: Optional[Index], columns: Optional[Index] -) -> Tuple[Index, Index]: + N: int, K: int, index: Index | None, columns: Index | None +) -> tuple[Index, Index]: # helper to create the axes as indexes # return axes or defaults @@ -559,7 +559,7 @@ def dataclasses_to_dicts(data): # Conversion of Inputs to Arrays -def to_arrays(data, columns, dtype: Optional[DtypeObj] = None): +def to_arrays(data, columns, dtype: DtypeObj | None = None): """ Return list of arrays, columns. """ @@ -610,9 +610,9 @@ def to_arrays(data, columns, dtype: Optional[DtypeObj] = None): def _list_to_arrays( - data: List[Scalar], - columns: Union[Index, List], -) -> Tuple[List[Scalar], Union[Index, List[Axis]]]: + data: list[Scalar], + columns: Index | list, +) -> tuple[list[Scalar], Index | list[Axis]]: # Note: we already check len(data) > 0 before getting hre if isinstance(data[0], tuple): content = lib.to_object_array_tuples(data) @@ -623,15 +623,15 @@ def _list_to_arrays( def _list_of_series_to_arrays( - data: List, - columns: Union[Index, List], -) -> Tuple[List[Scalar], Union[Index, List[Axis]]]: + data: list, + columns: Index | list, +) -> tuple[list[Scalar], Index | list[Axis]]: if columns is None: # We know pass_data is non-empty because data[0] is a Series pass_data = [x for x in data if isinstance(x, (ABCSeries, ABCDataFrame))] columns = get_objs_combined_axis(pass_data, sort=False) - indexer_cache: Dict[int, Scalar] = {} + indexer_cache: dict[int, Scalar] = {} aligned_values = [] for s in data: @@ -653,9 +653,9 @@ def _list_of_series_to_arrays( def _list_of_dict_to_arrays( - data: List[Dict], - columns: Union[Index, List], -) -> Tuple[List[Scalar], Union[Index, List[Axis]]]: + data: list[dict], + columns: Index | list, +) -> tuple[list[Scalar], Index | list[Axis]]: """ Convert list of dicts to numpy arrays @@ -690,9 +690,9 @@ def _list_of_dict_to_arrays( def _finalize_columns_and_data( content: np.ndarray, - columns: Optional[Union[Index, List]], - dtype: Optional[DtypeObj], -) -> Tuple[List[np.ndarray], Union[Index, List[Axis]]]: + columns: Index | list | None, + dtype: DtypeObj | None, +) -> tuple[list[np.ndarray], Index | list[Axis]]: """ Ensure we have valid columns, cast object dtypes if possible. """ @@ -710,8 +710,8 @@ def _finalize_columns_and_data( def _validate_or_indexify_columns( - content: List, columns: Optional[Union[Index, List]] -) -> Union[Index, List[Axis]]: + content: list, columns: Index | list | None +) -> Index | list[Axis]: """ If columns is None, make numbers as column names; Otherwise, validate that columns have valid length. @@ -768,8 +768,8 @@ def _validate_or_indexify_columns( def _convert_object_array( - content: List[Scalar], dtype: Optional[DtypeObj] = None -) -> List[Scalar]: + content: list[Scalar], dtype: DtypeObj | None = None +) -> list[Scalar]: """ Internal function to convert object array. diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index f1cf1aa9a72cb..01a4061774924 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -130,7 +130,7 @@ def __init__( do_integrity_check: bool = True, ): self.axes = [ensure_index(ax) for ax in axes] - self.blocks: Tuple[Block, ...] = tuple(blocks) + self.blocks: tuple[Block, ...] = tuple(blocks) for block in blocks: if self.ndim != block.ndim: @@ -148,7 +148,7 @@ def __init__( self._blklocs = None @classmethod - def from_blocks(cls, blocks: List[Block], axes: List[Index]): + def from_blocks(cls, blocks: list[Block], axes: list[Index]): """ Constructor for BlockManager and SingleBlockManager with same signature. """ @@ -330,7 +330,7 @@ def _verify_integrity(self) -> None: def reduce( self: T, func: Callable, ignore_failures: bool = False - ) -> Tuple[T, np.ndarray]: + ) -> tuple[T, np.ndarray]: """ Apply reduction function blockwise, returning a single-row BlockManager. @@ -349,7 +349,7 @@ def reduce( # If 2D, we assume that we're operating column-wise assert self.ndim == 2 - res_blocks: List[Block] = [] + res_blocks: list[Block] = [] for blk in self.blocks: nbs = blk.reduce(func, ignore_failures) res_blocks.extend(nbs) @@ -376,7 +376,7 @@ def operate_blockwise(self, other: BlockManager, array_op) -> BlockManager: def apply( self: T, f, - align_keys: Optional[List[str]] = None, + align_keys: list[str] | None = None, ignore_failures: bool = False, **kwargs, ) -> T: @@ -399,7 +399,7 @@ def apply( assert "filter" not in kwargs align_keys = align_keys or [] - result_blocks: List[Block] = [] + result_blocks: list[Block] = [] # fillna: Series/DataFrame is responsible for making sure value is aligned aligned_args = {k: kwargs[k] for k in align_keys} @@ -640,8 +640,8 @@ def replace(self, to_replace, value, inplace: bool, regex: bool) -> BlockManager def replace_list( self: T, - src_list: List[Any], - dest_list: List[Any], + src_list: list[Any], + dest_list: list[Any], inplace: bool = False, regex: bool = False, ) -> T: @@ -737,7 +737,7 @@ def get_numeric_data(self, copy: bool = False) -> BlockManager: return self._combine([b for b in self.blocks if b.is_numeric], copy) def _combine( - self: T, blocks: List[Block], copy: bool = True, index: Optional[Index] = None + self: T, blocks: list[Block], copy: bool = True, index: Index | None = None ) -> T: """ return a new manager with the blocks """ if len(blocks) == 0: @@ -747,7 +747,7 @@ def _combine( indexer = np.sort(np.concatenate([b.mgr_locs.as_array for b in blocks])) inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0]) - new_blocks: List[Block] = [] + new_blocks: list[Block] = [] for b in blocks: b = b.copy(deep=copy) b.mgr_locs = inv_indexer[b.mgr_locs.indexer] @@ -812,7 +812,7 @@ def copy_func(ax): def as_array( self, transpose: bool = False, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, copy: bool = False, na_value=lib.no_default, ) -> np.ndarray: @@ -869,7 +869,7 @@ def as_array( return arr.transpose() if transpose else arr def _interleave( - self, dtype: Optional[Dtype] = None, na_value=lib.no_default + self, dtype: Dtype | None = None, na_value=lib.no_default ) -> np.ndarray: """ Return ndarray from blocks with specified item order @@ -919,7 +919,7 @@ def to_dict(self, copy: bool = True): values : a dict of dtype -> BlockManager """ - bd: Dict[str, List[Block]] = {} + bd: dict[str, list[Block]] = {} for b in self.blocks: bd.setdefault(str(b.dtype), []).append(b) @@ -1043,7 +1043,7 @@ def idelete(self, indexer): ) self._rebuild_blknos_and_blklocs() - def iset(self, loc: Union[int, slice, np.ndarray], value): + def iset(self, loc: int | slice | np.ndarray, value): """ Set new item in-place. Does not consolidate. Adds new Block if not contained in the current set of items @@ -1125,7 +1125,7 @@ def value_getitem(placement): unfit_mgr_locs = np.concatenate(unfit_mgr_locs) unfit_count = len(unfit_mgr_locs) - new_blocks: List[Block] = [] + new_blocks: list[Block] = [] if value_is_extension_type: # This code (ab-)uses the fact that EA blocks contain only # one item. @@ -1482,8 +1482,8 @@ def unstack(self, unstacker, fill_value) -> BlockManager: new_columns = unstacker.get_new_columns(self.items) new_index = unstacker.new_index - new_blocks: List[Block] = [] - columns_mask: List[np.ndarray] = [] + new_blocks: list[Block] = [] + columns_mask: list[np.ndarray] = [] for blk in self.blocks: blk_cols = self.items[blk.mgr_locs.indexer] @@ -1534,7 +1534,7 @@ def __init__( self.blocks = (block,) @classmethod - def from_blocks(cls, blocks: List[Block], axes: List[Index]) -> SingleBlockManager: + def from_blocks(cls, blocks: list[Block], axes: list[Index]) -> SingleBlockManager: """ Constructor for BlockManager and SingleBlockManager with same signature. """ @@ -1629,7 +1629,7 @@ def fast_xs(self, loc): # Constructor Helpers -def create_block_manager_from_blocks(blocks, axes: List[Index]) -> BlockManager: +def create_block_manager_from_blocks(blocks, axes: list[Index]) -> BlockManager: try: if len(blocks) == 1 and not isinstance(blocks[0], Block): # if blocks[0] is of length 0, return empty blocks @@ -1656,7 +1656,7 @@ def create_block_manager_from_blocks(blocks, axes: List[Index]) -> BlockManager: def create_block_manager_from_arrays( - arrays, names: Index, axes: List[Index] + arrays, names: Index, axes: list[Index] ) -> BlockManager: assert isinstance(names, Index) assert isinstance(axes, list) @@ -1698,10 +1698,10 @@ def construction_error(tot_items, block_shape, axes, e=None): # ----------------------------------------------------------------------- -def _form_blocks(arrays, names: Index, axes) -> List[Block]: +def _form_blocks(arrays, names: Index, axes) -> list[Block]: # put "leftover" items in float bucket, where else? # generalize? - items_dict: DefaultDict[str, List] = defaultdict(list) + items_dict: DefaultDict[str, list] = defaultdict(list) extra_locs = [] names_idx = names @@ -1722,7 +1722,7 @@ def _form_blocks(arrays, names: Index, axes) -> List[Block]: block_type = get_block_type(v) items_dict[block_type.__name__].append((i, k, v)) - blocks: List[Block] = [] + blocks: list[Block] = [] if len(items_dict["FloatBlock"]): float_blocks = _multi_blockify(items_dict["FloatBlock"]) blocks.extend(float_blocks) @@ -1786,7 +1786,7 @@ def _form_blocks(arrays, names: Index, axes) -> List[Block]: return blocks -def _simple_blockify(tuples, dtype) -> List[Block]: +def _simple_blockify(tuples, dtype) -> list[Block]: """ return a single array of a block that has a single dtype; if dtype is not None, coerce to this dtype @@ -1801,7 +1801,7 @@ def _simple_blockify(tuples, dtype) -> List[Block]: return [block] -def _multi_blockify(tuples, dtype: Optional[Dtype] = None): +def _multi_blockify(tuples, dtype: Dtype | None = None): """ return an array of blocks that potentially have different dtypes """ # group by dtype grouper = itertools.groupby(tuples, lambda x: x[2].dtype) @@ -1844,7 +1844,7 @@ def _shape_compat(x) -> Shape: return stacked, placement -def _interleaved_dtype(blocks: Sequence[Block]) -> Optional[DtypeObj]: +def _interleaved_dtype(blocks: Sequence[Block]) -> DtypeObj | None: """ Find the common dtype for `blocks`. @@ -1871,7 +1871,7 @@ def _consolidate(blocks): gkey = lambda x: x._consolidate_key grouper = itertools.groupby(sorted(blocks, key=gkey), gkey) - new_blocks: List[Block] = [] + new_blocks: list[Block] = [] for (_can_consolidate, dtype), group_blocks in grouper: merged_blocks = _merge_blocks( list(group_blocks), dtype=dtype, can_consolidate=_can_consolidate @@ -1881,8 +1881,8 @@ def _consolidate(blocks): def _merge_blocks( - blocks: List[Block], dtype: DtypeObj, can_consolidate: bool -) -> List[Block]: + blocks: list[Block], dtype: DtypeObj, can_consolidate: bool +) -> list[Block]: if len(blocks) == 1: return blocks diff --git a/pandas/core/internals/ops.py b/pandas/core/internals/ops.py index 8250db3f5d888..5cb30642b6548 100644 --- a/pandas/core/internals/ops.py +++ b/pandas/core/internals/ops.py @@ -51,7 +51,7 @@ def operate_blockwise( # At this point we have already checked the parent DataFrames for # assert rframe._indexed_same(lframe) - res_blks: List[Block] = [] + res_blks: list[Block] = [] for lvals, rvals, locs, left_ea, right_ea, rblk in _iter_block_pairs(left, right): res_values = array_op(lvals, rvals) if left_ea and not right_ea and hasattr(res_values, "reshape"): @@ -79,7 +79,7 @@ def operate_blockwise( return new_mgr -def _reset_block_mgr_locs(nbs: List[Block], locs): +def _reset_block_mgr_locs(nbs: list[Block], locs): """ Reset mgr_locs to correspond to our original DataFrame. """ @@ -93,7 +93,7 @@ def _reset_block_mgr_locs(nbs: List[Block], locs): def _get_same_shape_values( lblk: Block, rblk: Block, left_ea: bool, right_ea: bool -) -> Tuple[ArrayLike, ArrayLike]: +) -> tuple[ArrayLike, ArrayLike]: """ Slice lblk.values to align with rblk. Squeeze if we have EAs. """ diff --git a/pandas/core/missing.py b/pandas/core/missing.py index d0ad38235d7e5..6db7fab3a6567 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -162,13 +162,13 @@ def find_valid_index(values, how: str): def interpolate_1d( xvalues: Index, yvalues: np.ndarray, - method: Optional[str] = "linear", - limit: Optional[int] = None, + method: str | None = "linear", + limit: int | None = None, limit_direction: str = "forward", - limit_area: Optional[str] = None, - fill_value: Optional[Any] = None, + limit_area: str | None = None, + fill_value: Any | None = None, bounds_error: bool = False, - order: Optional[int] = None, + order: int | None = None, **kwargs, ): """ @@ -233,7 +233,7 @@ def interpolate_1d( # are more than'limit' away from the prior non-NaN. # set preserve_nans based on direction using _interp_limit - preserve_nans: Union[List, Set] + preserve_nans: list | set if limit_direction == "forward": preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) elif limit_direction == "backward": @@ -529,7 +529,7 @@ def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolat def _interpolate_with_limit_area( - values: ArrayLike, method: str, limit: Optional[int], limit_area: Optional[str] + values: ArrayLike, method: str, limit: int | None, limit_area: str | None ) -> ArrayLike: """ Apply interpolation and limit_area logic to values along a to-be-specified axis. @@ -577,8 +577,8 @@ def interpolate_2d( values, method: str = "pad", axis: Axis = 0, - limit: Optional[int] = None, - limit_area: Optional[str] = None, + limit: int | None = None, + limit_area: str | None = None, ): """ Perform an actual interpolation of values, values will be make 2-d if diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 11ce2a1a3b8a3..8626fd0bb02e8 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -59,7 +59,7 @@ # ----------------------------------------------------------------------------- # constants -ARITHMETIC_BINOPS: Set[str] = { +ARITHMETIC_BINOPS: set[str] = { "add", "sub", "mul", @@ -79,7 +79,7 @@ } -COMPARISON_BINOPS: Set[str] = {"eq", "ne", "lt", "gt", "le", "ge"} +COMPARISON_BINOPS: set[str] = {"eq", "ne", "lt", "gt", "le", "ge"} # ----------------------------------------------------------------------------- @@ -187,7 +187,7 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): def align_method_FRAME( - left, right, axis, flex: Optional[bool] = False, level: Level = None + left, right, axis, flex: bool | None = False, level: Level = None ): """ Convert rhs to meet lhs dims if input is list, tuple or np.ndarray. diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 965de2e04bf40..a8188877a8fa7 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -45,7 +45,7 @@ from pandas.tseries.frequencies import is_subperiod, is_superperiod from pandas.tseries.offsets import DateOffset, Day, Nano, Tick -_shared_docs_kwargs: Dict[str, str] = {} +_shared_docs_kwargs: dict[str, str] = {} class Resampler(BaseGroupBy, ShallowMixin): @@ -234,7 +234,7 @@ def _assure_grouper(self): @Appender(_pipe_template) def pipe( self, - func: Union[Callable[..., T], Tuple[Callable[..., T], str]], + func: Callable[..., T] | tuple[Callable[..., T], str], *args, **kwargs, ) -> T: @@ -1361,18 +1361,18 @@ class TimeGrouper(Grouper): def __init__( self, freq="Min", - closed: Optional[str] = None, - label: Optional[str] = None, + closed: str | None = None, + label: str | None = None, how="mean", axis=0, fill_method=None, limit=None, loffset=None, - kind: Optional[str] = None, - convention: Optional[str] = None, - base: Optional[int] = None, - origin: Union[str, TimestampConvertibleTypes] = "start_day", - offset: Optional[TimedeltaConvertibleTypes] = None, + kind: str | None = None, + convention: str | None = None, + base: int | None = None, + origin: str | TimestampConvertibleTypes = "start_day", + offset: TimedeltaConvertibleTypes | None = None, **kwargs, ): # Check for correctness of the keyword arguments which would @@ -1851,7 +1851,7 @@ def _get_period_range_edges( def _insert_nat_bin( binner: PeriodIndex, bins: np.ndarray, labels: PeriodIndex, nat_count: int -) -> Tuple[PeriodIndex, np.ndarray, PeriodIndex]: +) -> tuple[PeriodIndex, np.ndarray, PeriodIndex]: # NaT handling as in pandas._lib.lib.generate_bins_dt64() # shift bins by the number of NaT assert nat_count > 0 diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index f9bff603cec38..85e6fdc238b34 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -52,7 +52,7 @@ @overload def concat( - objs: Union[Iterable[DataFrame], Mapping[Hashable, DataFrame]], + objs: Iterable[DataFrame] | Mapping[Hashable, DataFrame], axis=0, join: str = "outer", ignore_index: bool = False, @@ -68,7 +68,7 @@ def concat( @overload def concat( - objs: Union[Iterable[NDFrame], Mapping[Hashable, NDFrame]], + objs: Iterable[NDFrame] | Mapping[Hashable, NDFrame], axis=0, join: str = "outer", ignore_index: bool = False, @@ -83,7 +83,7 @@ def concat( def concat( - objs: Union[Iterable[NDFrame], Mapping[Hashable, NDFrame]], + objs: Iterable[NDFrame] | Mapping[Hashable, NDFrame], axis=0, join="outer", ignore_index: bool = False, @@ -308,7 +308,7 @@ class _Concatenator: def __init__( self, - objs: Union[Iterable[NDFrame], Mapping[Hashable, NDFrame]], + objs: Iterable[NDFrame] | Mapping[Hashable, NDFrame], axis=0, join: str = "outer", keys=None, @@ -377,7 +377,7 @@ def __init__( # get the sample # want the highest ndim that we have, and must be non-empty # unless all objs are empty - sample: Optional[NDFrame] = None + sample: NDFrame | None = None if len(ndims) > 1: max_ndim = max(ndims) for obj in objs: @@ -468,7 +468,7 @@ def __init__( self.new_axes = self._get_new_axes() def get_result(self): - cons: Type[FrameOrSeriesUnion] + cons: type[FrameOrSeriesUnion] sample: FrameOrSeriesUnion # series only @@ -533,7 +533,7 @@ def _get_result_dim(self) -> int: else: return self.objs[0].ndim - def _get_new_axes(self) -> List[Index]: + def _get_new_axes(self) -> list[Index]: ndim = self._get_result_dim() return [ self._get_concat_axis if i == self.bm_axis else self._get_comb_axis(i) @@ -562,7 +562,7 @@ def _get_concat_axis(self) -> Index: idx = ibase.default_index(len(self.objs)) return idx elif self.keys is None: - names: List[Hashable] = [None] * len(self.objs) + names: list[Hashable] = [None] * len(self.objs) num = 0 has_names = False for i, x in enumerate(self.objs): diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index b5f8b2d02207b..8b911e097bfc4 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -471,7 +471,7 @@ def wide_to_long( two 2.9 """ - def get_var_names(df, stub: str, sep: str, suffix: str) -> List[str]: + def get_var_names(df, stub: str, sep: str, suffix: str) -> list[str]: regex = fr"^{re.escape(stub)}{re.escape(sep)}{suffix}$" pattern = re.compile(regex) return [col for col in df.columns if pattern.match(col)] diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index a3eef92bacfad..446104e4bf617 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -67,16 +67,16 @@ def merge( left: FrameOrSeriesUnion, right: FrameOrSeriesUnion, how: str = "inner", - on: Optional[IndexLabel] = None, - left_on: Optional[IndexLabel] = None, - right_on: Optional[IndexLabel] = None, + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, left_index: bool = False, right_index: bool = False, sort: bool = False, suffixes: Suffixes = ("_x", "_y"), copy: bool = True, indicator: bool = False, - validate: Optional[str] = None, + validate: str | None = None, ) -> DataFrame: op = _MergeOperation( left, @@ -117,7 +117,7 @@ def _groupby_and_merge(by, on, left: DataFrame, right: DataFrame, merge_pieces): by = [by] lby = left.groupby(by, sort=False) - rby: Optional[groupby.DataFrameGroupBy] = None + rby: groupby.DataFrameGroupBy | None = None # if we can groupby the rhs # then we can get vastly better perf @@ -160,12 +160,12 @@ def _groupby_and_merge(by, on, left: DataFrame, right: DataFrame, merge_pieces): def merge_ordered( left: DataFrame, right: DataFrame, - on: Optional[IndexLabel] = None, - left_on: Optional[IndexLabel] = None, - right_on: Optional[IndexLabel] = None, + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, left_by=None, right_by=None, - fill_method: Optional[str] = None, + fill_method: str | None = None, suffixes: Suffixes = ("_x", "_y"), how: str = "outer", ) -> DataFrame: @@ -303,9 +303,9 @@ def _merger(x, y): def merge_asof( left: DataFrame, right: DataFrame, - on: Optional[IndexLabel] = None, - left_on: Optional[IndexLabel] = None, - right_on: Optional[IndexLabel] = None, + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, left_index: bool = False, right_index: bool = False, by=None, @@ -590,9 +590,9 @@ def __init__( left: FrameOrSeriesUnion, right: FrameOrSeriesUnion, how: str = "inner", - on: Optional[IndexLabel] = None, - left_on: Optional[IndexLabel] = None, - right_on: Optional[IndexLabel] = None, + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, axis: int = 1, left_index: bool = False, right_index: bool = False, @@ -600,7 +600,7 @@ def __init__( suffixes: Suffixes = ("_x", "_y"), copy: bool = True, indicator: bool = False, - validate: Optional[str] = None, + validate: str | None = None, ): _left = _validate_operand(left) _right = _validate_operand(right) @@ -626,7 +626,7 @@ def __init__( self.indicator = indicator - self.indicator_name: Optional[str] + self.indicator_name: str | None if isinstance(self.indicator, str): self.indicator_name = self.indicator elif isinstance(self.indicator, bool): @@ -718,13 +718,13 @@ def get_result(self): return result.__finalize__(self, method="merge") - def _maybe_drop_cross_column(self, result: DataFrame, cross_col: Optional[str]): + def _maybe_drop_cross_column(self, result: DataFrame, cross_col: str | None): if cross_col is not None: result.drop(columns=cross_col, inplace=True) def _indicator_pre_merge( self, left: DataFrame, right: DataFrame - ) -> Tuple[DataFrame, DataFrame]: + ) -> tuple[DataFrame, DataFrame]: columns = left.columns.union(right.columns) @@ -1232,7 +1232,7 @@ def _maybe_coerce_merge_keys(self): def _create_cross_configuration( self, left: DataFrame, right: DataFrame - ) -> Tuple[DataFrame, DataFrame, str, str]: + ) -> tuple[DataFrame, DataFrame, str, str]: """ Creates the configuration to dispatch the cross operation to inner join, e.g. adding a join column and resetting parameters. Join column is added @@ -1549,15 +1549,15 @@ def __init__( self, left: DataFrame, right: DataFrame, - on: Optional[IndexLabel] = None, - left_on: Optional[IndexLabel] = None, - right_on: Optional[IndexLabel] = None, + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, left_index: bool = False, right_index: bool = False, axis: int = 1, suffixes: Suffixes = ("_x", "_y"), copy: bool = True, - fill_method: Optional[str] = None, + fill_method: str | None = None, how: str = "outer", ): @@ -1643,9 +1643,9 @@ def __init__( self, left: DataFrame, right: DataFrame, - on: Optional[IndexLabel] = None, - left_on: Optional[IndexLabel] = None, - right_on: Optional[IndexLabel] = None, + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, left_index: bool = False, right_index: bool = False, by=None, @@ -1654,7 +1654,7 @@ def __init__( axis: int = 1, suffixes: Suffixes = ("_x", "_y"), copy: bool = True, - fill_method: Optional[str] = None, + fill_method: str | None = None, how: str = "asof", tolerance=None, allow_exact_matches: bool = True, @@ -1980,7 +1980,7 @@ def _left_join_on_index(left_ax: Index, right_ax: Index, join_keys, sort: bool = def _factorize_keys( lk: ArrayLike, rk: ArrayLike, sort: bool = True, how: str = "inner" -) -> Tuple[np.ndarray, np.ndarray, int]: +) -> tuple[np.ndarray, np.ndarray, int]: """ Encode left and right keys as enumerated types. diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 7ac98d7fcbd33..29e8e70ad7ab8 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -55,7 +55,7 @@ def pivot_table( columns = _convert_by(columns) if isinstance(aggfunc, list): - pieces: List[DataFrame] = [] + pieces: list[DataFrame] = [] keys = [] for func in aggfunc: table = pivot_table( @@ -230,7 +230,7 @@ def _add_margins( if margins_name in table.columns.get_level_values(level): raise ValueError(msg) - key: Union[str, Tuple[str, ...]] + key: str | tuple[str, ...] if len(rows) > 1: key = (margins_name,) + ("",) * (len(rows) - 1) else: @@ -374,7 +374,7 @@ def _generate_marginal_results_without_values( ): if len(cols) > 0: # need to "interleave" the margins - margin_keys: Union[List, Index] = [] + margin_keys: list | Index = [] def _all_key(): if len(cols) == 1: @@ -425,9 +425,9 @@ def _convert_by(by): @Appender(_shared_docs["pivot"], indents=1) def pivot( data: DataFrame, - index: Optional[IndexLabel] = None, - columns: Optional[IndexLabel] = None, - values: Optional[IndexLabel] = None, + index: IndexLabel | None = None, + columns: IndexLabel | None = None, + values: IndexLabel | None = None, ) -> DataFrame: if columns is None: raise TypeError("pivot() missing 1 required argument: 'columns'") @@ -648,7 +648,7 @@ def _normalize(table, normalize, margins: bool, margins_name="All"): if margins is False: # Actual Normalizations - normalizers: Dict[Union[bool, str], Callable] = { + normalizers: dict[bool | str, Callable] = { "all": lambda x: x / x.sum(axis=1).sum(axis=0), "columns": lambda x: x / x.sum(), "index": lambda x: x.div(x.sum(axis=1), axis=0), @@ -734,8 +734,8 @@ def _get_names(arrs, names, prefix: str = "row"): def _build_names_mapper( - rownames: List[str], colnames: List[str] -) -> Tuple[Dict[str, str], List[str], Dict[str, str], List[str]]: + rownames: list[str], colnames: list[str] +) -> tuple[dict[str, str], list[str], dict[str, str], list[str]]: """ Given the names of a DataFrame's rows and columns, returns a set of unique row and column names and mappers that convert to original names. @@ -764,7 +764,7 @@ def _build_names_mapper( """ def get_duplicates(names): - seen: Set = set() + seen: set = set() return {name for name in names if name not in seen} shared_names = set(rownames).intersection(set(colnames)) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index d389f19598d14..cc74edd7ae1b1 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -739,7 +739,7 @@ def get_dummies( columns=None, sparse: bool = False, drop_first: bool = False, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, ) -> DataFrame: """ Convert categorical variable into dummy/indicator variables. @@ -878,7 +878,7 @@ def check_len(item, name): elif isinstance(prefix_sep, dict): prefix_sep = [prefix_sep[col] for col in data_to_encode.columns] - with_dummies: List[DataFrame] + with_dummies: list[DataFrame] if data_to_encode.shape == data.shape: # Encoding the entire df, do not prepend any dropped columns with_dummies = [] @@ -924,7 +924,7 @@ def _get_dummies_1d( dummy_na=False, sparse=False, drop_first=False, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, ): from pandas.core.reshape.concat import concat @@ -965,7 +965,7 @@ def get_empty_frame(data) -> DataFrame: else: dummy_cols = [f"{prefix}{prefix_sep}{level}" for level in levels] - index: Optional[Index] + index: Index | None if isinstance(data, Series): index = data.index else: @@ -973,7 +973,7 @@ def get_empty_frame(data) -> DataFrame: if sparse: - fill_value: Union[bool, float, int] + fill_value: bool | float | int if is_integer_dtype(dtype): fill_value = 0 elif dtype == bool: @@ -983,7 +983,7 @@ def get_empty_frame(data) -> DataFrame: sparse_series = [] N = len(data) - sp_indices: List[List] = [[] for _ in range(len(dummy_cols))] + sp_indices: list[list] = [[] for _ in range(len(dummy_cols))] mask = codes != -1 codes = codes[mask] n_idx = np.arange(N)[mask] diff --git a/pandas/core/series.py b/pandas/core/series.py index f75292f32dbca..2bc87d8922ae1 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -196,7 +196,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame): _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray) _name: Hashable - _metadata: List[str] = ["name"] + _metadata: list[str] = ["name"] _internal_names_set = {"index"} | generic.NDFrame._internal_names_set _accessors = {"dt", "cat", "str", "sparse"} _hidden_attrs = ( @@ -221,7 +221,7 @@ def __init__( self, data=None, index=None, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, name=None, copy: bool = False, fastpath: bool = False, @@ -347,7 +347,7 @@ def __init__( self.name = name self._set_axis(0, index, fastpath=True) - def _init_dict(self, data, index=None, dtype: Optional[Dtype] = None): + def _init_dict(self, data, index=None, dtype: Dtype | None = None): """ Derive the "_mgr" and "index" attributes of a new Series from a dictionary input. @@ -398,11 +398,11 @@ def _init_dict(self, data, index=None, dtype: Optional[Dtype] = None): # ---------------------------------------------------------------------- @property - def _constructor(self) -> Type[Series]: + def _constructor(self) -> type[Series]: return Series @property - def _constructor_expanddim(self) -> Type[DataFrame]: + def _constructor_expanddim(self) -> type[DataFrame]: from pandas.core.frame import DataFrame return DataFrame @@ -622,7 +622,7 @@ def __len__(self) -> int: """ return len(self._mgr) - def view(self, dtype: Optional[Dtype] = None) -> Series: + def view(self, dtype: Dtype | None = None) -> Series: """ Create a new view of the Series. @@ -696,7 +696,7 @@ def view(self, dtype: Optional[Dtype] = None) -> Series: # NDArray Compat _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray) - def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray: + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: """ Return the values as a NumPy array. @@ -757,7 +757,7 @@ def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray: # indexers @property - def axes(self) -> List[Index]: + def axes(self) -> list[Index]: """ Return a list of the row axis labels. """ @@ -1383,12 +1383,12 @@ def to_string( ) def to_markdown( self, - buf: Optional[IO[str]] = None, + buf: IO[str] | None = None, mode: str = "wt", index: bool = True, storage_options: StorageOptions = None, **kwargs, - ) -> Optional[str]: + ) -> str | None: """ Print {klass} in Markdown-friendly format. @@ -1453,7 +1453,7 @@ def to_markdown( # ---------------------------------------------------------------------- - def items(self) -> Iterable[Tuple[Hashable, Any]]: + def items(self) -> Iterable[tuple[Hashable, Any]]: """ Lazily iterate over (index, value) tuples. @@ -1483,7 +1483,7 @@ def items(self) -> Iterable[Tuple[Hashable, Any]]: return zip(iter(self.index), iter(self)) @Appender(items.__doc__) - def iteritems(self) -> Iterable[Tuple[Hashable, Any]]: + def iteritems(self) -> Iterable[tuple[Hashable, Any]]: return self.items() # ---------------------------------------------------------------------- @@ -1847,7 +1847,7 @@ def unique(self): """ return super().unique() - def drop_duplicates(self, keep="first", inplace=False) -> Optional[Series]: + def drop_duplicates(self, keep="first", inplace=False) -> Series | None: """ Return Series with duplicate values removed. @@ -2310,8 +2310,8 @@ def corr(self, other, method="pearson", min_periods=None) -> float: def cov( self, other: Series, - min_periods: Optional[int] = None, - ddof: Optional[int] = 1, + min_periods: int | None = None, + ddof: int | None = 1, ) -> float: """ Compute covariance with Series, excluding missing values. @@ -2704,8 +2704,8 @@ def _binop(self, other, func, level=None, fill_value=None): return this._construct_result(result, name) def _construct_result( - self, result: Union[ArrayLike, Tuple[ArrayLike, ArrayLike]], name: Hashable - ) -> Union[Series, Tuple[Series, Series]]: + self, result: ArrayLike | tuple[ArrayLike, ArrayLike], name: Hashable + ) -> Series | tuple[Series, Series]: """ Construct an appropriately-labelled Series from the result of an op. @@ -3980,7 +3980,7 @@ def apply( self, func: AggFuncType, convert_dtype: bool = True, - args: Tuple[Any, ...] = (), + args: tuple[Any, ...] = (), **kwds, ) -> FrameOrSeriesUnion: """ @@ -4395,7 +4395,7 @@ def fillna( inplace=False, limit=None, downcast=None, - ) -> Optional[Series]: + ) -> Series | None: return super().fillna( value=value, method=method, @@ -4828,7 +4828,7 @@ def asfreq( self, freq, method=None, - how: Optional[str] = None, + how: str | None = None, normalize: bool = False, fill_value=None, ) -> Series: @@ -4845,16 +4845,16 @@ def resample( self, rule, axis=0, - closed: Optional[str] = None, - label: Optional[str] = None, + closed: str | None = None, + label: str | None = None, convention: str = "start", - kind: Optional[str] = None, + kind: str | None = None, loffset=None, - base: Optional[int] = None, + base: int | None = None, on=None, level=None, - origin: Union[str, TimestampConvertibleTypes] = "start_day", - offset: Optional[TimedeltaConvertibleTypes] = None, + origin: str | TimestampConvertibleTypes = "start_day", + offset: TimedeltaConvertibleTypes | None = None, ) -> Resampler: return super().resample( rule=rule, diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 2c2e0c16a4482..a106aa3684287 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -40,13 +40,13 @@ def get_indexer_indexer( target: Index, - level: Union[str, int, List[str], List[int]], + level: str | int | list[str] | list[int], ascending: bool, kind: str, na_position: str, sort_remaining: bool, key: IndexKeyFunc, -) -> Optional[np.array]: +) -> np.array | None: """ Helper method that return the indexer according to input parameters for the sort_index method of DataFrame and Series. @@ -263,7 +263,7 @@ def indexer_from_factorized(labels, shape, compress: bool = True): def lexsort_indexer( - keys, orders=None, na_position: str = "last", key: Optional[Callable] = None + keys, orders=None, na_position: str = "last", key: Callable | None = None ): """ Performs lexical sorting on a set of keys @@ -330,8 +330,8 @@ def nargsort( kind: str = "quicksort", ascending: bool = True, na_position: str = "last", - key: Optional[Callable] = None, - mask: Optional[np.ndarray] = None, + key: Callable | None = None, + mask: np.ndarray | None = None, ): """ Intended to be a drop-in replacement for np.argsort which handles NaNs. @@ -468,7 +468,7 @@ def _ensure_key_mapped_multiindex( return type(index).from_arrays(mapped) -def ensure_key_mapped(values, key: Optional[Callable], levels=None): +def ensure_key_mapped(values, key: Callable | None, levels=None): """ Applies a callable key function to the values function and checks that the resulting value has the same shape. Can be called on Index @@ -517,10 +517,10 @@ def get_flattened_list( ngroups: int, levels: Iterable[Index], labels: Iterable[np.ndarray], -) -> List[Tuple]: +) -> list[tuple]: """Map compressed group id -> key tuple.""" comp_ids = comp_ids.astype(np.int64, copy=False) - arrays: DefaultDict[int, List[int]] = defaultdict(list) + arrays: DefaultDict[int, list[int]] = defaultdict(list) for labs, level in zip(labels, levels): table = hashtable.Int64HashTable(ngroups) table.map(comp_ids, labs.astype(np.int64, copy=False)) @@ -530,8 +530,8 @@ def get_flattened_list( def get_indexer_dict( - label_list: List[np.ndarray], keys: List[Index] -) -> Dict[Union[str, Tuple], np.ndarray]: + label_list: list[np.ndarray], keys: list[Index] +) -> dict[str | tuple, np.ndarray]: """ Returns ------- diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index b0df626da973a..e6a90948891a1 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -88,7 +88,7 @@ def _guess_datetime_format_for_array(arr, **kwargs): def should_cache( - arg: ArrayConvertible, unique_share: float = 0.7, check_count: Optional[int] = None + arg: ArrayConvertible, unique_share: float = 0.7, check_count: int | None = None ) -> bool: """ Decides whether to do caching. @@ -145,7 +145,7 @@ def should_cache( def _maybe_cache( arg: ArrayConvertible, - format: Optional[str], + format: str | None, cache: bool, convert_listlike: Callable, ) -> Series: @@ -184,7 +184,7 @@ def _maybe_cache( def _box_as_indexlike( - dt_array: ArrayLike, utc: Optional[bool] = None, name: Hashable = None + dt_array: ArrayLike, utc: bool | None = None, name: Hashable = None ) -> Index: """ Properly boxes the ndarray of datetimes to DatetimeIndex @@ -215,7 +215,7 @@ def _box_as_indexlike( def _convert_and_box_cache( arg: DatetimeScalarOrArrayConvertible, cache_array: Series, - name: Optional[str] = None, + name: str | None = None, ) -> Index: """ Convert array of dates with a cache and wrap the result in an Index. @@ -269,15 +269,15 @@ def _return_parsed_timezone_results(result, timezones, tz, name): def _convert_listlike_datetimes( arg, - format: Optional[str], + format: str | None, name: Hashable = None, - tz: Optional[Timezone] = None, - unit: Optional[str] = None, - errors: Optional[str] = None, - infer_datetime_format: Optional[bool] = None, - dayfirst: Optional[bool] = None, - yearfirst: Optional[bool] = None, - exact: Optional[bool] = None, + tz: Timezone | None = None, + unit: str | None = None, + errors: str | None = None, + infer_datetime_format: bool | None = None, + dayfirst: bool | None = None, + yearfirst: bool | None = None, + exact: bool | None = None, ): """ Helper function for to_datetime. Performs the conversions of 1D listlike @@ -564,14 +564,14 @@ def to_datetime( errors: str = ..., dayfirst: bool = ..., yearfirst: bool = ..., - utc: Optional[bool] = ..., - format: Optional[str] = ..., + utc: bool | None = ..., + format: str | None = ..., exact: bool = ..., - unit: Optional[str] = ..., + unit: str | None = ..., infer_datetime_format: bool = ..., origin=..., cache: bool = ..., -) -> Union[DatetimeScalar, NaTType]: +) -> DatetimeScalar | NaTType: ... @@ -581,10 +581,10 @@ def to_datetime( errors: str = ..., dayfirst: bool = ..., yearfirst: bool = ..., - utc: Optional[bool] = ..., - format: Optional[str] = ..., + utc: bool | None = ..., + format: str | None = ..., exact: bool = ..., - unit: Optional[str] = ..., + unit: str | None = ..., infer_datetime_format: bool = ..., origin=..., cache: bool = ..., @@ -594,14 +594,14 @@ def to_datetime( @overload def to_datetime( - arg: Union[List, Tuple], + arg: list | tuple, errors: str = ..., dayfirst: bool = ..., yearfirst: bool = ..., - utc: Optional[bool] = ..., - format: Optional[str] = ..., + utc: bool | None = ..., + format: str | None = ..., exact: bool = ..., - unit: Optional[str] = ..., + unit: str | None = ..., infer_datetime_format: bool = ..., origin=..., cache: bool = ..., @@ -614,14 +614,14 @@ def to_datetime( errors: str = "raise", dayfirst: bool = False, yearfirst: bool = False, - utc: Optional[bool] = None, - format: Optional[str] = None, + utc: bool | None = None, + format: str | None = None, exact: bool = True, - unit: Optional[str] = None, + unit: str | None = None, infer_datetime_format: bool = False, origin="unix", cache: bool = True, -) -> Union[DatetimeIndex, Series, DatetimeScalar, NaTType]: +) -> DatetimeIndex | Series | DatetimeScalar | NaTType: """ Convert argument to datetime. diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 6c16ff3edc1d2..7a48db4be10b1 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -48,10 +48,10 @@ def get_center_of_mass( - comass: Optional[float], - span: Optional[float], - halflife: Optional[float], - alpha: Optional[float], + comass: float | None, + span: float | None, + halflife: float | None, + alpha: float | None, ) -> float: valid_count = common.count_not_none(comass, span, halflife, alpha) if valid_count > 1: @@ -226,15 +226,15 @@ class ExponentialMovingWindow(BaseWindow): def __init__( self, obj, - com: Optional[float] = None, - span: Optional[float] = None, - halflife: Optional[Union[float, TimedeltaConvertibleTypes]] = None, - alpha: Optional[float] = None, + com: float | None = None, + span: float | None = None, + halflife: float | TimedeltaConvertibleTypes | None = None, + alpha: float | None = None, min_periods: int = 0, adjust: bool = True, ignore_na: bool = False, axis: int = 0, - times: Optional[Union[str, np.ndarray, FrameOrSeries]] = None, + times: str | np.ndarray | FrameOrSeries | None = None, **kwargs, ): self.obj = obj @@ -398,8 +398,8 @@ def var_func(values, begin, end, min_periods): @Appender(_doc_template) def cov( self, - other: Optional[Union[np.ndarray, FrameOrSeries]] = None, - pairwise: Optional[bool] = None, + other: np.ndarray | FrameOrSeries | None = None, + pairwise: bool | None = None, bias: bool = False, **kwargs, ): @@ -453,8 +453,8 @@ def _get_cov(X, Y): @Appender(_doc_template) def corr( self, - other: Optional[Union[np.ndarray, FrameOrSeries]] = None, - pairwise: Optional[bool] = None, + other: np.ndarray | FrameOrSeries | None = None, + pairwise: bool | None = None, **kwargs, ): """ diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 439cd586825e1..470b4fb57567f 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -80,7 +80,7 @@ class BaseWindow(ShallowMixin, SelectionMixin): """Provides utilities for performing windowing operations.""" - _attributes: List[str] = [ + _attributes: list[str] = [ "window", "min_periods", "center", @@ -90,18 +90,18 @@ class BaseWindow(ShallowMixin, SelectionMixin): "closed", "method", ] - exclusions: Set[str] = set() + exclusions: set[str] = set() def __init__( self, obj: FrameOrSeries, window=None, - min_periods: Optional[int] = None, + min_periods: int | None = None, center: bool = False, - win_type: Optional[str] = None, + win_type: str | None = None, axis: Axis = 0, - on: Optional[Union[str, Index]] = None, - closed: Optional[str] = None, + on: str | Index | None = None, + closed: str | None = None, method: str = "single", **kwargs, ): @@ -274,7 +274,7 @@ def __iter__(self): result = obj.iloc[slice(s, e)] yield result - def _prep_values(self, values: Optional[np.ndarray] = None) -> np.ndarray: + def _prep_values(self, values: np.ndarray | None = None) -> np.ndarray: """Convert input to numpy arrays for Cython routines""" if values is None: values = extract_array(self._selected_obj, extract_numpy=True) @@ -344,7 +344,7 @@ def _get_window_indexer(self) -> BaseIndexer: return FixedWindowIndexer(window_size=self.window) def _apply_series( - self, homogeneous_func: Callable[..., ArrayLike], name: Optional[str] = None + self, homogeneous_func: Callable[..., ArrayLike], name: str | None = None ) -> Series: """ Series version of _apply_blockwise @@ -362,7 +362,7 @@ def _apply_series( return obj._constructor(result, index=obj.index, name=obj.name) def _apply_blockwise( - self, homogeneous_func: Callable[..., ArrayLike], name: Optional[str] = None + self, homogeneous_func: Callable[..., ArrayLike], name: str | None = None ) -> FrameOrSeriesUnion: """ Apply the given function to the DataFrame broken down into homogeneous @@ -396,7 +396,7 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike: return out def _apply_tablewise( - self, homogeneous_func: Callable[..., ArrayLike], name: Optional[str] = None + self, homogeneous_func: Callable[..., ArrayLike], name: str | None = None ) -> FrameOrSeriesUnion: if self._selected_obj.ndim == 1: raise ValueError("method='table' not applicable for Series objects.") @@ -418,8 +418,8 @@ def _apply_tablewise( def _apply( self, func: Callable[..., Any], - name: Optional[str] = None, - numba_cache_key: Optional[Tuple[Callable, str]] = None, + name: str | None = None, + numba_cache_key: tuple[Callable, str] | None = None, **kwargs, ): """ @@ -767,8 +767,8 @@ def __init__(self, obj, *args, **kwargs): def _apply( self, func: Callable[..., Any], - name: Optional[str] = None, - numba_cache_key: Optional[Tuple[Callable, str]] = None, + name: str | None = None, + numba_cache_key: tuple[Callable, str] | None = None, **kwargs, ) -> FrameOrSeries: result = super()._apply( @@ -1064,8 +1064,8 @@ def _center_window(self, result: np.ndarray, offset: int) -> np.ndarray: def _apply( self, func: Callable[[np.ndarray, int, int], np.ndarray], - name: Optional[str] = None, - numba_cache_key: Optional[Tuple[Callable, str]] = None, + name: str | None = None, + numba_cache_key: tuple[Callable, str] | None = None, **kwargs, ): """ @@ -1302,10 +1302,10 @@ def apply( self, func: Callable[..., Any], raw: bool = False, - engine: Optional[str] = None, - engine_kwargs: Optional[Dict[str, bool]] = None, - args: Optional[Tuple[Any, ...]] = None, - kwargs: Optional[Dict[str, Any]] = None, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + args: tuple[Any, ...] | None = None, + kwargs: dict[str, Any] | None = None, ): if args is None: args = () @@ -1344,8 +1344,8 @@ def apply( def _generate_cython_apply_func( self, - args: Tuple[Any, ...], - kwargs: Dict[str, Any], + args: tuple[Any, ...], + kwargs: dict[str, Any], raw: bool, function: Callable[..., Any], ) -> Callable[[np.ndarray, np.ndarray, np.ndarray, int], np.ndarray]: @@ -2287,8 +2287,8 @@ def _get_window_indexer(self) -> GroupbyIndexer: ------- GroupbyIndexer """ - rolling_indexer: Type[BaseIndexer] - indexer_kwargs: Optional[Dict[str, Any]] = None + rolling_indexer: type[BaseIndexer] + indexer_kwargs: dict[str, Any] | None = None index_array = self._index_array window = self.window if isinstance(self.window, BaseIndexer): diff --git a/pandas/io/common.py b/pandas/io/common.py index e5a1f58ec6cd2..4d5d613181f51 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -75,7 +75,7 @@ class IOHandles: handle: Buffer compression: CompressionDict - created_handles: List[Buffer] = dataclasses.field(default_factory=list) + created_handles: list[Buffer] = dataclasses.field(default_factory=list) is_wrapped: bool = False is_mmap: bool = False @@ -322,7 +322,7 @@ def _get_filepath_or_buffer( # If botocore is installed we fallback to reading with anon=True # to allow reads from public buckets - err_types_to_retry_with_anon: List[Any] = [] + err_types_to_retry_with_anon: list[Any] = [] try: import_optional_dependency("botocore") from botocore.exceptions import ClientError, NoCredentialsError @@ -408,7 +408,7 @@ def file_path_to_url(path: str) -> str: def get_compression_method( compression: CompressionOptions, -) -> Tuple[Optional[str], CompressionDict]: +) -> tuple[str | None, CompressionDict]: """ Simplifies a compression argument to a compression method string and a mapping containing additional arguments. @@ -428,7 +428,7 @@ def get_compression_method( ------ ValueError on mapping missing 'method' key """ - compression_method: Optional[str] + compression_method: str | None if isinstance(compression, Mapping): compression_args = dict(compression) try: @@ -442,8 +442,8 @@ def get_compression_method( def infer_compression( - filepath_or_buffer: FilePathOrBuffer, compression: Optional[str] -) -> Optional[str]: + filepath_or_buffer: FilePathOrBuffer, compression: str | None +) -> str | None: """ Get the compression method for filepath_or_buffer. If compression='infer', the inferred compression method is returned. Otherwise, the input @@ -503,11 +503,11 @@ def infer_compression( def get_handle( path_or_buf: FilePathOrBuffer, mode: str, - encoding: Optional[str] = None, + encoding: str | None = None, compression: CompressionOptions = None, memory_map: bool = False, is_text: bool = True, - errors: Optional[str] = None, + errors: str | None = None, storage_options: StorageOptions = None, ) -> IOHandles: """ @@ -576,7 +576,7 @@ def get_handle( ) handle = ioargs.filepath_or_buffer - handles: List[Buffer] + handles: list[Buffer] # memory mapping needs to be the first step handle, memory_map, handles = _maybe_memory_map( @@ -719,14 +719,14 @@ def __init__( self, file: FilePathOrBuffer, mode: str, - archive_name: Optional[str] = None, + archive_name: str | None = None, **kwargs, ): mode = mode.replace("b", "") self.archive_name = archive_name - self.multiple_write_buffer: Optional[Union[StringIO, BytesIO]] = None + self.multiple_write_buffer: StringIO | BytesIO | None = None - kwargs_zip: Dict[str, Any] = {"compression": zipfile.ZIP_DEFLATED} + kwargs_zip: dict[str, Any] = {"compression": zipfile.ZIP_DEFLATED} kwargs_zip.update(kwargs) super().__init__(file, mode, **kwargs_zip) # type: ignore[arg-type] @@ -807,10 +807,10 @@ def _maybe_memory_map( memory_map: bool, encoding: str, mode: str, - errors: Optional[str], -) -> Tuple[FileOrBuffer, bool, List[Buffer]]: + errors: str | None, +) -> tuple[FileOrBuffer, bool, list[Buffer]]: """Try to memory map file/buffer.""" - handles: List[Buffer] = [] + handles: list[Buffer] = [] memory_map &= hasattr(handle, "fileno") or isinstance(handle, str) if not memory_map: return handle, memory_map, handles diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 11974d25d72d3..39bfc508aa654 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -315,7 +315,7 @@ def read_excel( index_col=None, usecols=None, squeeze=False, - dtype: Optional[DtypeArg] = None, + dtype: DtypeArg | None = None, engine=None, converters=None, true_values=None, @@ -442,7 +442,7 @@ def parse( index_col=None, usecols=None, squeeze=False, - dtype: Optional[DtypeArg] = None, + dtype: DtypeArg | None = None, true_values=None, false_values=None, skiprows=None, @@ -791,7 +791,7 @@ def save(self): def __init__( self, - path: Union[FilePathOrBuffer, ExcelWriter], + path: FilePathOrBuffer | ExcelWriter, engine=None, date_format=None, datetime_format=None, @@ -817,7 +817,7 @@ def __init__( self.handles = get_handle( path, mode, storage_options=storage_options, is_text=False ) - self.sheets: Dict[str, Any] = {} + self.sheets: dict[str, Any] = {} self.cur_sheet = None if date_format is None: diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 71e1bf6b43ad5..9290c9d640a86 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -53,7 +53,7 @@ def save(self): self.book.save(self.handles.handle) @classmethod - def _convert_to_style_kwargs(cls, style_dict: dict) -> Dict[str, Serialisable]: + def _convert_to_style_kwargs(cls, style_dict: dict) -> dict[str, Serialisable]: """ Convert a style_dict to a set of kwargs suitable for initializing or updating-on-copy an openpyxl v2 style object. @@ -78,7 +78,7 @@ def _convert_to_style_kwargs(cls, style_dict: dict) -> Dict[str, Serialisable]: """ _style_key_map = {"borders": "border"} - style_kwargs: Dict[str, Serialisable] = {} + style_kwargs: dict[str, Serialisable] = {} for k, v in style_dict.items(): if k in _style_key_map: k = _style_key_map[k] @@ -389,7 +389,7 @@ def write_cells( # Write the frame cells using openpyxl. sheet_name = self._get_sheet_name(sheet_name) - _style_cache: Dict[str, Dict[str, Serialisable]] = {} + _style_cache: dict[str, dict[str, Serialisable]] = {} if sheet_name in self.sheets: wks = self.sheets[sheet_name] @@ -411,7 +411,7 @@ def write_cells( if fmt: xcell.number_format = fmt - style_kwargs: Optional[Dict[str, Serialisable]] = {} + style_kwargs: dict[str, Serialisable] | None = {} if cell.style: key = str(cell.style) style_kwargs = _style_cache.get(key) @@ -490,7 +490,7 @@ def close(self): super().close() @property - def sheet_names(self) -> List[str]: + def sheet_names(self) -> list[str]: return self.book.sheetnames def get_sheet_by_name(self, name: str): @@ -522,8 +522,8 @@ def _convert_cell(self, cell, convert_float: bool) -> Scalar: return cell.value - def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: - data: List[List[Scalar]] = [] + def get_sheet_data(self, sheet, convert_float: bool) -> list[list[Scalar]]: + data: list[list[Scalar]] = [] for row in sheet.rows: data.append([self._convert_cell(cell, convert_float) for cell in row]) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index ca8340cfd0a24..1aca372f4b2eb 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -51,19 +51,19 @@ def __init__( formatter: DataFrameFormatter, path_or_buf: FilePathOrBuffer[str] = "", sep: str = ",", - cols: Optional[Sequence[Hashable]] = None, - index_label: Optional[IndexLabel] = None, + cols: Sequence[Hashable] | None = None, + index_label: IndexLabel | None = None, mode: str = "w", - encoding: Optional[str] = None, + encoding: str | None = None, errors: str = "strict", compression: CompressionOptions = "infer", - quoting: Optional[int] = None, + quoting: int | None = None, line_terminator="\n", - chunksize: Optional[int] = None, - quotechar: Optional[str] = '"', - date_format: Optional[str] = None, + chunksize: int | None = None, + quotechar: str | None = '"', + date_format: str | None = None, doublequote: bool = True, - escapechar: Optional[str] = None, + escapechar: str | None = None, storage_options: StorageOptions = None, ): self.fmt = formatter @@ -93,7 +93,7 @@ def na_rep(self) -> str: return self.fmt.na_rep @property - def float_format(self) -> Optional[FloatFormatType]: + def float_format(self) -> FloatFormatType | None: return self.fmt.float_format @property @@ -101,14 +101,14 @@ def decimal(self) -> str: return self.fmt.decimal @property - def header(self) -> Union[bool, Sequence[str]]: + def header(self) -> bool | Sequence[str]: return self.fmt.header @property def index(self) -> bool: return self.fmt.index - def _initialize_index_label(self, index_label: Optional[IndexLabel]) -> IndexLabel: + def _initialize_index_label(self, index_label: IndexLabel | None) -> IndexLabel: if index_label is not False: if index_label is None: return self._get_index_label_from_obj() @@ -117,20 +117,20 @@ def _initialize_index_label(self, index_label: Optional[IndexLabel]) -> IndexLab return [index_label] return index_label - def _get_index_label_from_obj(self) -> List[str]: + def _get_index_label_from_obj(self) -> list[str]: if isinstance(self.obj.index, ABCMultiIndex): return self._get_index_label_multiindex() else: return self._get_index_label_flat() - def _get_index_label_multiindex(self) -> List[str]: + def _get_index_label_multiindex(self) -> list[str]: return [name or "" for name in self.obj.index.names] - def _get_index_label_flat(self) -> List[str]: + def _get_index_label_flat(self) -> list[str]: index_label = self.obj.index.name return [""] if index_label is None else [index_label] - def _initialize_quotechar(self, quotechar: Optional[str]) -> Optional[str]: + def _initialize_quotechar(self, quotechar: str | None) -> str | None: if self.quoting != csvlib.QUOTE_NONE: # prevents crash in _csv return quotechar @@ -141,7 +141,7 @@ def has_mi_columns(self) -> bool: return bool(isinstance(self.obj.columns, ABCMultiIndex)) def _initialize_columns( - self, cols: Optional[Sequence[Hashable]] + self, cols: Sequence[Hashable] | None ) -> Sequence[Hashable]: # validate mi options if self.has_mi_columns: @@ -164,13 +164,13 @@ def _initialize_columns( else: return list(new_cols) - def _initialize_chunksize(self, chunksize: Optional[int]) -> int: + def _initialize_chunksize(self, chunksize: int | None) -> int: if chunksize is None: return (100000 // (len(self.cols) or 1)) or 1 return int(chunksize) @property - def _number_format(self) -> Dict[str, Any]: + def _number_format(self) -> dict[str, Any]: """Dictionary used for storing number formatting settings.""" return { "na_rep": self.na_rep, @@ -221,8 +221,8 @@ def write_cols(self) -> Sequence[Hashable]: return self.cols @property - def encoded_labels(self) -> List[Hashable]: - encoded_labels: List[Hashable] = [] + def encoded_labels(self) -> list[Hashable]: + encoded_labels: list[Hashable] = [] if self.index and self.index_label: assert isinstance(self.index_label, Sequence) @@ -272,7 +272,7 @@ def _save_header(self) -> None: for row in self._generate_multiindex_header_rows(): self.writer.writerow(row) - def _generate_multiindex_header_rows(self) -> Iterator[List[Hashable]]: + def _generate_multiindex_header_rows(self) -> Iterator[list[Hashable]]: columns = self.obj.columns for i in range(columns.nlevels): # we need at least 1 index column to write our col names diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 05d94366e6623..4d5833c16638b 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -174,7 +174,7 @@ class CategoricalFormatter: def __init__( self, categorical: Categorical, - buf: Optional[IO[str]] = None, + buf: IO[str] | None = None, length: bool = True, na_rep: str = "NaN", footer: bool = True, @@ -203,7 +203,7 @@ def _get_footer(self) -> str: return str(footer) - def _get_formatted_values(self) -> List[str]: + def _get_formatted_values(self) -> list[str]: return format_array( self.categorical._internal_get_values(), None, @@ -238,16 +238,16 @@ class SeriesFormatter: def __init__( self, series: Series, - buf: Optional[IO[str]] = None, - length: Union[bool, str] = True, + buf: IO[str] | None = None, + length: bool | str = True, header: bool = True, index: bool = True, na_rep: str = "NaN", name: bool = False, - float_format: Optional[str] = None, + float_format: str | None = None, dtype: bool = True, - max_rows: Optional[int] = None, - min_rows: Optional[int] = None, + max_rows: int | None = None, + min_rows: int | None = None, ): self.series = series self.buf = buf if buf is not None else StringIO() @@ -268,7 +268,7 @@ def __init__( self._chk_truncate() def _chk_truncate(self) -> None: - self.tr_row_num: Optional[int] + self.tr_row_num: int | None min_rows = self.min_rows max_rows = self.max_rows @@ -335,7 +335,7 @@ def _get_footer(self) -> str: return str(footer) - def _get_formatted_index(self) -> Tuple[List[str], bool]: + def _get_formatted_index(self) -> tuple[list[str], bool]: index = self.tr_series.index if isinstance(index, MultiIndex): @@ -346,7 +346,7 @@ def _get_formatted_index(self) -> Tuple[List[str], bool]: fmt_index = index.format(name=True) return fmt_index, have_header - def _get_formatted_values(self) -> List[str]: + def _get_formatted_values(self) -> list[str]: return format_array( self.tr_series._values, None, @@ -401,7 +401,7 @@ def __init__(self): def len(self, text: str) -> int: return len(text) - def justify(self, texts: Any, max_len: int, mode: str = "right") -> List[str]: + def justify(self, texts: Any, max_len: int, mode: str = "right") -> list[str]: return justify(texts, max_len, mode=mode) def adjoin(self, space: int, *lists, **kwargs) -> str: @@ -434,7 +434,7 @@ def len(self, text: str) -> int: def justify( self, texts: Iterable[str], max_len: int, mode: str = "right" - ) -> List[str]: + ) -> list[str]: # re-calculate padding space per str considering East Asian Width def _get_pad(t): return max_len - self.len(t) + len(t) @@ -464,20 +464,20 @@ class DataFrameFormatter: def __init__( self, frame: DataFrame, - columns: Optional[Sequence[str]] = None, - col_space: Optional[ColspaceArgType] = None, - header: Union[bool, Sequence[str]] = True, + columns: Sequence[str] | None = None, + col_space: ColspaceArgType | None = None, + header: bool | Sequence[str] = True, index: bool = True, na_rep: str = "NaN", - formatters: Optional[FormattersType] = None, - justify: Optional[str] = None, - float_format: Optional[FloatFormatType] = None, - sparsify: Optional[bool] = None, + formatters: FormattersType | None = None, + justify: str | None = None, + float_format: FloatFormatType | None = None, + sparsify: bool | None = None, index_names: bool = True, - max_rows: Optional[int] = None, - min_rows: Optional[int] = None, - max_cols: Optional[int] = None, - show_dimensions: Union[bool, str] = False, + max_rows: int | None = None, + min_rows: int | None = None, + max_cols: int | None = None, + show_dimensions: bool | str = False, decimal: str = ".", bold_rows: bool = False, escape: bool = True, @@ -508,7 +508,7 @@ def __init__( self.truncate() self.adj = get_adjustment() - def get_strcols(self) -> List[List[str]]: + def get_strcols(self) -> list[list[str]]: """ Render a DataFrame to a list of columns (as lists of strings). """ @@ -562,13 +562,13 @@ def show_col_idx_names(self) -> bool: def max_rows_displayed(self) -> int: return min(self.max_rows or len(self.frame), len(self.frame)) - def _initialize_sparsify(self, sparsify: Optional[bool]) -> bool: + def _initialize_sparsify(self, sparsify: bool | None) -> bool: if sparsify is None: return get_option("display.multi_sparse") return sparsify def _initialize_formatters( - self, formatters: Optional[FormattersType] + self, formatters: FormattersType | None ) -> FormattersType: if formatters is None: return {} @@ -580,13 +580,13 @@ def _initialize_formatters( f"DataFrame number of columns({len(self.frame.columns)})" ) - def _initialize_justify(self, justify: Optional[str]) -> str: + def _initialize_justify(self, justify: str | None) -> str: if justify is None: return get_option("display.colheader_justify") else: return justify - def _initialize_columns(self, columns: Optional[Sequence[str]]) -> Index: + def _initialize_columns(self, columns: Sequence[str] | None) -> Index: if columns is not None: cols = ensure_index(columns) self.frame = self.frame[cols] @@ -595,7 +595,7 @@ def _initialize_columns(self, columns: Optional[Sequence[str]]) -> Index: return self.frame.columns def _initialize_colspace( - self, col_space: Optional[ColspaceArgType] + self, col_space: ColspaceArgType | None ) -> ColspaceType: result: ColspaceType @@ -620,7 +620,7 @@ def _initialize_colspace( result = dict(zip(self.frame.columns, col_space)) return result - def _calc_max_cols_fitted(self) -> Optional[int]: + def _calc_max_cols_fitted(self) -> int | None: """Number of columns fitting the screen.""" if not self._is_in_terminal(): return self.max_cols @@ -631,9 +631,9 @@ def _calc_max_cols_fitted(self) -> Optional[int]: else: return self.max_cols - def _calc_max_rows_fitted(self) -> Optional[int]: + def _calc_max_rows_fitted(self) -> int | None: """Number of rows with data fitting the screen.""" - max_rows: Optional[int] + max_rows: int | None if self._is_in_terminal(): _, height = get_terminal_size() @@ -650,7 +650,7 @@ def _calc_max_rows_fitted(self) -> Optional[int]: return self._adjust_max_rows(max_rows) - def _adjust_max_rows(self, max_rows: Optional[int]) -> Optional[int]: + def _adjust_max_rows(self, max_rows: int | None) -> int | None: """Adjust max_rows using display logic. See description here: @@ -742,8 +742,8 @@ def _truncate_vertically(self) -> None: self.tr_frame = self.tr_frame.iloc[:row_num, :] self.tr_row_num = row_num - def _get_strcols_without_index(self) -> List[List[str]]: - strcols: List[List[str]] = [] + def _get_strcols_without_index(self) -> list[list[str]]: + strcols: list[list[str]] = [] if not is_list_like(self.header) and not self.header: for i, c in enumerate(self.tr_frame): @@ -789,7 +789,7 @@ def _get_strcols_without_index(self) -> List[List[str]]: return strcols - def format_col(self, i: int) -> List[str]: + def format_col(self, i: int) -> list[str]: frame = self.tr_frame formatter = self._get_formatter(i) return format_array( @@ -802,7 +802,7 @@ def format_col(self, i: int) -> List[str]: leading_space=self.index, ) - def _get_formatter(self, i: Union[str, int]) -> Optional[Callable]: + def _get_formatter(self, i: str | int) -> Callable | None: if isinstance(self.formatters, (list, tuple)): if is_integer(i): i = cast(int, i) @@ -814,7 +814,7 @@ def _get_formatter(self, i: Union[str, int]) -> Optional[Callable]: i = self.columns[i] return self.formatters.get(i, None) - def _get_formatted_column_labels(self, frame: DataFrame) -> List[List[str]]: + def _get_formatted_column_labels(self, frame: DataFrame) -> list[list[str]]: from pandas.core.indexes.multi import sparsify_labels columns = frame.columns @@ -855,7 +855,7 @@ def space_format(x, y): # self.str_columns = str_columns return str_columns - def _get_formatted_index(self, frame: DataFrame) -> List[str]: + def _get_formatted_index(self, frame: DataFrame) -> list[str]: # Note: this is only used by to_string() and to_latex(), not by # to_html(). so safe to cast col_space here. col_space = {k: cast(int, v) for k, v in self.col_space.items()} @@ -895,8 +895,8 @@ def _get_formatted_index(self, frame: DataFrame) -> List[str]: else: return adjoined - def _get_column_name_list(self) -> List[str]: - names: List[str] = [] + def _get_column_name_list(self) -> list[str]: + names: list[str] = [] columns = self.frame.columns if isinstance(columns, MultiIndex): names.extend("" if name is None else name for name in columns.names) @@ -927,17 +927,17 @@ def __init__(self, fmt: DataFrameFormatter): def to_latex( self, - buf: Optional[FilePathOrBuffer[str]] = None, - column_format: Optional[str] = None, + buf: FilePathOrBuffer[str] | None = None, + column_format: str | None = None, longtable: bool = False, - encoding: Optional[str] = None, + encoding: str | None = None, multicolumn: bool = False, - multicolumn_format: Optional[str] = None, + multicolumn_format: str | None = None, multirow: bool = False, - caption: Optional[str] = None, - label: Optional[str] = None, - position: Optional[str] = None, - ) -> Optional[str]: + caption: str | None = None, + label: str | None = None, + position: str | None = None, + ) -> str | None: """ Render a DataFrame to a LaTeX tabular/longtable environment output. """ @@ -959,14 +959,14 @@ def to_latex( def to_html( self, - buf: Optional[FilePathOrBuffer[str]] = None, - encoding: Optional[str] = None, - classes: Optional[Union[str, List, Tuple]] = None, + buf: FilePathOrBuffer[str] | None = None, + encoding: str | None = None, + classes: str | list | tuple | None = None, notebook: bool = False, - border: Optional[int] = None, - table_id: Optional[str] = None, + border: int | None = None, + table_id: str | None = None, render_links: bool = False, - ) -> Optional[str]: + ) -> str | None: """ Render a DataFrame to a html table. @@ -1005,10 +1005,10 @@ def to_html( def to_string( self, - buf: Optional[FilePathOrBuffer[str]] = None, - encoding: Optional[str] = None, - line_width: Optional[int] = None, - ) -> Optional[str]: + buf: FilePathOrBuffer[str] | None = None, + encoding: str | None = None, + line_width: int | None = None, + ) -> str | None: """ Render a DataFrame to a console-friendly tabular output. @@ -1029,23 +1029,23 @@ def to_string( def to_csv( self, - path_or_buf: Optional[FilePathOrBuffer[str]] = None, - encoding: Optional[str] = None, + path_or_buf: FilePathOrBuffer[str] | None = None, + encoding: str | None = None, sep: str = ",", - columns: Optional[Sequence[Hashable]] = None, - index_label: Optional[IndexLabel] = None, + columns: Sequence[Hashable] | None = None, + index_label: IndexLabel | None = None, mode: str = "w", compression: CompressionOptions = "infer", - quoting: Optional[int] = None, + quoting: int | None = None, quotechar: str = '"', - line_terminator: Optional[str] = None, - chunksize: Optional[int] = None, - date_format: Optional[str] = None, + line_terminator: str | None = None, + chunksize: int | None = None, + date_format: str | None = None, doublequote: bool = True, - escapechar: Optional[str] = None, + escapechar: str | None = None, errors: str = "strict", storage_options: StorageOptions = None, - ) -> Optional[str]: + ) -> str | None: """ Render dataframe as comma-separated file. """ @@ -1089,9 +1089,9 @@ def to_csv( def save_to_buffer( string: str, - buf: Optional[FilePathOrBuffer[str]] = None, - encoding: Optional[str] = None, -) -> Optional[str]: + buf: FilePathOrBuffer[str] | None = None, + encoding: str | None = None, +) -> str | None: """ Perform serialization. Write to buf or return as string if buf is None. """ @@ -1103,7 +1103,7 @@ def save_to_buffer( @contextmanager -def get_buffer(buf: Optional[FilePathOrBuffer[str]], encoding: Optional[str] = None): +def get_buffer(buf: FilePathOrBuffer[str] | None, encoding: str | None = None): """ Context manager to open, yield and close buffer for filenames or Path-like objects, otherwise yield buf unchanged. @@ -1137,16 +1137,16 @@ def get_buffer(buf: Optional[FilePathOrBuffer[str]], encoding: Optional[str] = N def format_array( values: Any, - formatter: Optional[Callable], - float_format: Optional[FloatFormatType] = None, + formatter: Callable | None, + float_format: FloatFormatType | None = None, na_rep: str = "NaN", - digits: Optional[int] = None, - space: Optional[Union[str, int]] = None, + digits: int | None = None, + space: str | int | None = None, justify: str = "right", decimal: str = ".", - leading_space: Optional[bool] = True, - quoting: Optional[int] = None, -) -> List[str]: + leading_space: bool | None = True, + quoting: int | None = None, +) -> list[str]: """ Format an array for printing. @@ -1173,7 +1173,7 @@ def format_array( ------- List[str] """ - fmt_klass: Type[GenericArrayFormatter] + fmt_klass: type[GenericArrayFormatter] if is_datetime64_dtype(values.dtype): fmt_klass = Datetime64Formatter elif is_datetime64tz_dtype(values.dtype): @@ -1219,15 +1219,15 @@ def __init__( self, values: Any, digits: int = 7, - formatter: Optional[Callable] = None, + formatter: Callable | None = None, na_rep: str = "NaN", - space: Union[str, int] = 12, - float_format: Optional[FloatFormatType] = None, + space: str | int = 12, + float_format: FloatFormatType | None = None, justify: str = "right", decimal: str = ".", - quoting: Optional[int] = None, + quoting: int | None = None, fixed_width: bool = True, - leading_space: Optional[bool] = True, + leading_space: bool | None = True, ): self.values = values self.digits = digits @@ -1241,11 +1241,11 @@ def __init__( self.fixed_width = fixed_width self.leading_space = leading_space - def get_result(self) -> List[str]: + def get_result(self) -> list[str]: fmt_values = self._format_strings() return _make_fixed_width(fmt_values, self.justify) - def _format_strings(self) -> List[str]: + def _format_strings(self) -> list[str]: if self.float_format is None: float_format = get_option("display.float_format") if float_format is None: @@ -1329,8 +1329,8 @@ def __init__(self, *args, **kwargs): def _value_formatter( self, - float_format: Optional[FloatFormatType] = None, - threshold: Optional[Union[float, int]] = None, + float_format: FloatFormatType | None = None, + threshold: float | int | None = None, ) -> Callable: """Returns a function to be applied on each value to format it""" # the float_format parameter supersedes self.float_format @@ -1436,7 +1436,7 @@ def format_values_with(float_format): # There is a special default string when we are fixed-width # The default is otherwise to use str instead of a formatting string - float_format: Optional[FloatFormatType] + float_format: FloatFormatType | None if self.float_format is None: if self.fixed_width: if self.leading_space is True: @@ -1484,12 +1484,12 @@ def format_values_with(float_format): return formatted_values - def _format_strings(self) -> List[str]: + def _format_strings(self) -> list[str]: return list(self.get_result_as_array()) class IntArrayFormatter(GenericArrayFormatter): - def _format_strings(self) -> List[str]: + def _format_strings(self) -> list[str]: if self.leading_space is False: formatter_str = lambda x: f"{x:d}".format(x=x) else: @@ -1502,7 +1502,7 @@ def _format_strings(self) -> List[str]: class Datetime64Formatter(GenericArrayFormatter): def __init__( self, - values: Union[np.ndarray, Series, DatetimeIndex, DatetimeArray], + values: np.ndarray | Series | DatetimeIndex | DatetimeArray, nat_rep: str = "NaT", date_format: None = None, **kwargs, @@ -1511,7 +1511,7 @@ def __init__( self.nat_rep = nat_rep self.date_format = date_format - def _format_strings(self) -> List[str]: + def _format_strings(self) -> list[str]: """ we by definition have DO NOT have a TZ """ values = self.values @@ -1528,7 +1528,7 @@ def _format_strings(self) -> List[str]: class ExtensionArrayFormatter(GenericArrayFormatter): - def _format_strings(self) -> List[str]: + def _format_strings(self) -> list[str]: values = extract_array(self.values, extract_numpy=True) formatter = self.formatter @@ -1557,10 +1557,10 @@ def _format_strings(self) -> List[str]: def format_percentiles( - percentiles: Union[ - np.ndarray, List[Union[int, float]], List[float], List[Union[str, float]] - ] -) -> List[str]: + percentiles: ( + np.ndarray | list[int | float] | list[float] | list[str | float] + ) +) -> list[str]: """ Outputs rounded and formatted percentiles. @@ -1626,7 +1626,7 @@ def format_percentiles( def is_dates_only( - values: Union[np.ndarray, DatetimeArray, Index, DatetimeIndex] + values: np.ndarray | DatetimeArray | Index | DatetimeIndex ) -> bool: # return a boolean if we are only dates (and don't have a timezone) if not isinstance(values, Index): @@ -1647,7 +1647,7 @@ def is_dates_only( return False -def _format_datetime64(x: Union[NaTType, Timestamp], nat_rep: str = "NaT") -> str: +def _format_datetime64(x: NaTType | Timestamp, nat_rep: str = "NaT") -> str: if x is NaT: return nat_rep @@ -1655,9 +1655,9 @@ def _format_datetime64(x: Union[NaTType, Timestamp], nat_rep: str = "NaT") -> st def _format_datetime64_dateonly( - x: Union[NaTType, Timestamp], + x: NaTType | Timestamp, nat_rep: str = "NaT", - date_format: Optional[str] = None, + date_format: str | None = None, ) -> str: if x is NaT: return nat_rep @@ -1669,7 +1669,7 @@ def _format_datetime64_dateonly( def get_format_datetime64( - is_dates_only: bool, nat_rep: str = "NaT", date_format: Optional[str] = None + is_dates_only: bool, nat_rep: str = "NaT", date_format: str | None = None ) -> Callable: if is_dates_only: @@ -1681,8 +1681,8 @@ def get_format_datetime64( def get_format_datetime64_from_values( - values: Union[np.ndarray, DatetimeArray, DatetimeIndex], date_format: Optional[str] -) -> Optional[str]: + values: np.ndarray | DatetimeArray | DatetimeIndex, date_format: str | None +) -> str | None: """ given values and a date_format, return a string format """ if isinstance(values, np.ndarray) and values.ndim > 1: # We don't actually care about the order of values, and DatetimeIndex @@ -1696,7 +1696,7 @@ def get_format_datetime64_from_values( class Datetime64TZFormatter(Datetime64Formatter): - def _format_strings(self) -> List[str]: + def _format_strings(self) -> list[str]: """ we by definition have a TZ """ values = self.values.astype(object) ido = is_dates_only(values) @@ -1711,7 +1711,7 @@ def _format_strings(self) -> List[str]: class Timedelta64Formatter(GenericArrayFormatter): def __init__( self, - values: Union[np.ndarray, TimedeltaIndex], + values: np.ndarray | TimedeltaIndex, nat_rep: str = "NaT", box: bool = False, **kwargs, @@ -1720,7 +1720,7 @@ def __init__( self.nat_rep = nat_rep self.box = box - def _format_strings(self) -> List[str]: + def _format_strings(self) -> list[str]: formatter = self.formatter or get_format_timedelta64( self.values, nat_rep=self.nat_rep, box=self.box ) @@ -1728,7 +1728,7 @@ def _format_strings(self) -> List[str]: def get_format_timedelta64( - values: Union[np.ndarray, TimedeltaIndex, TimedeltaArray], + values: np.ndarray | TimedeltaIndex | TimedeltaArray, nat_rep: str = "NaT", box: bool = False, ) -> Callable: @@ -1767,11 +1767,11 @@ def _formatter(x): def _make_fixed_width( - strings: List[str], + strings: list[str], justify: str = "right", - minimum: Optional[int] = None, - adj: Optional[TextAdjustment] = None, -) -> List[str]: + minimum: int | None = None, + adj: TextAdjustment | None = None, +) -> list[str]: if len(strings) == 0 or justify == "all": return strings @@ -1801,7 +1801,7 @@ def just(x: str) -> str: return result -def _trim_zeros_complex(str_complexes: np.ndarray, decimal: str = ".") -> List[str]: +def _trim_zeros_complex(str_complexes: np.ndarray, decimal: str = ".") -> list[str]: """ Separates the real and imaginary parts from the complex number, and executes the _trim_zeros_float method on each of those. @@ -1839,8 +1839,8 @@ def _trim_zeros_single_float(str_float: str) -> str: def _trim_zeros_float( - str_floats: Union[np.ndarray, List[str]], decimal: str = "." -) -> List[str]: + str_floats: np.ndarray | list[str], decimal: str = "." +) -> list[str]: """ Trims the maximum number of trailing zeros equally from all numbers containing decimals, leaving just one if @@ -1852,7 +1852,7 @@ def _trim_zeros_float( def is_number_with_decimal(x): return re.match(number_regex, x) is not None - def should_trim(values: Union[np.ndarray, List[str]]) -> bool: + def should_trim(values: np.ndarray | list[str]) -> bool: """ Determine if an array of strings should be trimmed. @@ -1909,11 +1909,11 @@ class EngFormatter: 24: "Y", } - def __init__(self, accuracy: Optional[int] = None, use_eng_prefix: bool = False): + def __init__(self, accuracy: int | None = None, use_eng_prefix: bool = False): self.accuracy = accuracy self.use_eng_prefix = use_eng_prefix - def __call__(self, num: Union[int, float]) -> str: + def __call__(self, num: int | float) -> str: """ Formats a number in engineering notation, appending a letter representing the power of 1000 of the original number. Some examples: @@ -1991,8 +1991,8 @@ def set_eng_float_format(accuracy: int = 3, use_eng_prefix: bool = False) -> Non def get_level_lengths( - levels: Any, sentinel: Union[bool, object, str] = "" -) -> List[Dict[int, int]]: + levels: Any, sentinel: bool | object | str = "" +) -> list[dict[int, int]]: """ For each index in each level the function returns lengths of indexes. @@ -2033,7 +2033,7 @@ def get_level_lengths( return result -def buffer_put_lines(buf: IO[str], lines: List[str]) -> None: +def buffer_put_lines(buf: IO[str], lines: list[str]) -> None: """ Appends lines to a buffer. diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py index b1675fa5c5375..3031280ed2f2b 100644 --- a/pandas/io/formats/info.py +++ b/pandas/io/formats/info.py @@ -27,7 +27,7 @@ from pandas.core.frame import DataFrame -def _put_str(s: Union[str, Dtype], space: int) -> str: +def _put_str(s: str | Dtype, space: int) -> str: """ Make string of specified length, padding to the right if necessary. @@ -53,7 +53,7 @@ def _put_str(s: Union[str, Dtype], space: int) -> str: return str(s)[:space].ljust(space) -def _sizeof_fmt(num: Union[int, float], size_qualifier: str) -> str: +def _sizeof_fmt(num: int | float, size_qualifier: str) -> str: """ Return size in human readable format. @@ -85,8 +85,8 @@ def _sizeof_fmt(num: Union[int, float], size_qualifier: str) -> str: def _initialize_memory_usage( - memory_usage: Optional[Union[bool, str]] = None, -) -> Union[bool, str]: + memory_usage: bool | str | None = None, +) -> bool | str: """Get memory usage based on inputs and display options.""" if memory_usage is None: memory_usage = get_option("display.memory_usage") @@ -108,7 +108,7 @@ class BaseInfo(ABC): """ data: FrameOrSeriesUnion - memory_usage: Union[bool, str] + memory_usage: bool | str @property @abstractmethod @@ -168,10 +168,10 @@ def size_qualifier(self) -> str: def render( self, *, - buf: Optional[IO[str]], - max_cols: Optional[int], - verbose: Optional[bool], - show_counts: Optional[bool], + buf: IO[str] | None, + max_cols: int | None, + verbose: bool | None, + show_counts: bool | None, ) -> None: """ Print a concise summary of a %(klass)s. @@ -230,7 +230,7 @@ class DataFrameInfo(BaseInfo): def __init__( self, data: DataFrame, - memory_usage: Optional[Union[bool, str]] = None, + memory_usage: bool | str | None = None, ): self.data: DataFrame = data self.memory_usage = _initialize_memory_usage(memory_usage) @@ -284,10 +284,10 @@ def memory_usage_bytes(self) -> int: def render( self, *, - buf: Optional[IO[str]], - max_cols: Optional[int], - verbose: Optional[bool], - show_counts: Optional[bool], + buf: IO[str] | None, + max_cols: int | None, + verbose: bool | None, + show_counts: bool | None, ) -> None: printer = DataFrameInfoPrinter( info=self, @@ -303,7 +303,7 @@ class InfoPrinterAbstract: Class for printing dataframe or series info. """ - def to_buffer(self, buf: Optional[IO[str]] = None) -> None: + def to_buffer(self, buf: IO[str] | None = None) -> None: """Save dataframe info into buffer.""" table_builder = self._create_table_builder() lines = table_builder.get_lines() @@ -335,9 +335,9 @@ class DataFrameInfoPrinter(InfoPrinterAbstract): def __init__( self, info: DataFrameInfo, - max_cols: Optional[int] = None, - verbose: Optional[bool] = None, - show_counts: Optional[bool] = None, + max_cols: int | None = None, + verbose: bool | None = None, + show_counts: bool | None = None, ): self.info = info self.data = info.data @@ -365,12 +365,12 @@ def col_count(self) -> int: """Number of columns to be summarized.""" return self.info.col_count - def _initialize_max_cols(self, max_cols: Optional[int]) -> int: + def _initialize_max_cols(self, max_cols: int | None) -> int: if max_cols is None: return get_option("display.max_info_columns", self.col_count + 1) return max_cols - def _initialize_show_counts(self, show_counts: Optional[bool]) -> bool: + def _initialize_show_counts(self, show_counts: bool | None) -> bool: if show_counts is None: return bool(not self.exceeds_info_cols and not self.exceeds_info_rows) else: @@ -402,11 +402,11 @@ class TableBuilderAbstract(ABC): Abstract builder for info table. """ - _lines: List[str] + _lines: list[str] info: BaseInfo @abstractmethod - def get_lines(self) -> List[str]: + def get_lines(self) -> list[str]: """Product in a form of list of lines (strings).""" @property @@ -466,7 +466,7 @@ class DataFrameTableBuilder(TableBuilderAbstract): def __init__(self, *, info: DataFrameInfo): self.info: DataFrameInfo = info - def get_lines(self) -> List[str]: + def get_lines(self) -> list[str]: self._lines = [] if self.col_count == 0: self._fill_empty_info() diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 0cb9aa3bea6ab..0a6969437bb9c 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -146,17 +146,17 @@ class Styler: def __init__( self, data: FrameOrSeriesUnion, - precision: Optional[int] = None, - table_styles: Optional[List[Dict[str, List[Tuple[str, str]]]]] = None, - uuid: Optional[str] = None, - caption: Optional[str] = None, - table_attributes: Optional[str] = None, + precision: int | None = None, + table_styles: list[dict[str, list[tuple[str, str]]]] | None = None, + uuid: str | None = None, + caption: str | None = None, + table_attributes: str | None = None, cell_ids: bool = True, - na_rep: Optional[str] = None, + na_rep: str | None = None, uuid_len: int = 5, ): - self.ctx: DefaultDict[Tuple[int, int], List[str]] = defaultdict(list) - self._todo: List[Tuple[Callable, Tuple, Dict]] = [] + self.ctx: DefaultDict[tuple[int, int], list[str]] = defaultdict(list) + self._todo: list[tuple[Callable, tuple, dict]] = [] if not isinstance(data, (pd.Series, pd.DataFrame)): raise TypeError("``data`` must be a Series or DataFrame") @@ -184,9 +184,9 @@ def __init__( self.cell_ids = cell_ids self.na_rep = na_rep - self.tooltips: Optional[_Tooltips] = None + self.tooltips: _Tooltips | None = None - self.cell_context: Dict[str, Any] = {} + self.cell_context: dict[str, Any] = {} # display_funcs maps (row, col) -> formatting function @@ -200,7 +200,7 @@ def default_display_func(x): return x self._display_funcs: DefaultDict[ - Tuple[int, int], Callable[[Any], str] + tuple[int, int], Callable[[Any], str] ] = defaultdict(lambda: default_display_func) def _repr_html_(self) -> str: @@ -266,8 +266,8 @@ def set_tooltips(self, ttips: DataFrame) -> Styler: def set_tooltips_class( self, - name: Optional[str] = None, - properties: Optional[Sequence[Tuple[str, Union[str, int, float]]]] = None, + name: str | None = None, + properties: Sequence[tuple[str, str | int | float]] | None = None, ) -> Styler: """ Manually configure the name and/or properties of the class for @@ -330,19 +330,19 @@ def to_excel( excel_writer, sheet_name: str = "Sheet1", na_rep: str = "", - float_format: Optional[str] = None, - columns: Optional[Sequence[Hashable]] = None, - header: Union[Sequence[Hashable], bool] = True, + float_format: str | None = None, + columns: Sequence[Hashable] | None = None, + header: Sequence[Hashable] | bool = True, index: bool = True, - index_label: Optional[IndexLabel] = None, + index_label: IndexLabel | None = None, startrow: int = 0, startcol: int = 0, - engine: Optional[str] = None, + engine: str | None = None, merge_cells: bool = True, - encoding: Optional[str] = None, + encoding: str | None = None, inf_rep: str = "inf", verbose: bool = True, - freeze_panes: Optional[Tuple[int, int]] = None, + freeze_panes: tuple[int, int] | None = None, ) -> None: from pandas.io.formats.excel import ExcelFormatter @@ -568,7 +568,7 @@ def format_attr(pair): return d - def format(self, formatter, subset=None, na_rep: Optional[str] = None) -> Styler: + def format(self, formatter, subset=None, na_rep: str | None = None) -> Styler: """ Format the text display value of cells. @@ -836,7 +836,7 @@ def _compute(self): def _apply( self, func: Callable[..., Styler], - axis: Optional[Axis] = 0, + axis: Axis | None = 0, subset=None, **kwargs, ) -> Styler: @@ -875,7 +875,7 @@ def _apply( def apply( self, func: Callable[..., Styler], - axis: Optional[Axis] = 0, + axis: Axis | None = 0, subset=None, **kwargs, ) -> Styler: @@ -972,7 +972,7 @@ def where( self, cond: Callable, value: str, - other: Optional[str] = None, + other: str | None = None, subset=None, **kwargs, ) -> Styler: @@ -1050,7 +1050,7 @@ def set_table_attributes(self, attributes: str) -> Styler: self.table_attributes = attributes return self - def export(self) -> List[Tuple[Callable, Tuple, Dict]]: + def export(self) -> list[tuple[Callable, tuple, dict]]: """ Export the styles to applied to the current Styler. @@ -1066,7 +1066,7 @@ def export(self) -> List[Tuple[Callable, Tuple, Dict]]: """ return self._todo - def use(self, styles: List[Tuple[Callable, Tuple, Dict]]) -> Styler: + def use(self, styles: list[tuple[Callable, tuple, dict]]) -> Styler: """ Set the styles on the current Styler. @@ -1267,7 +1267,7 @@ def _highlight_null(v, null_color: str) -> str: def highlight_null( self, null_color: str = "red", - subset: Optional[IndexLabel] = None, + subset: IndexLabel | None = None, ) -> Styler: """ Shade the background ``null_color`` for missing values. @@ -1292,11 +1292,11 @@ def background_gradient( cmap="PuBu", low: float = 0, high: float = 0, - axis: Optional[Axis] = 0, + axis: Axis | None = 0, subset=None, text_color_threshold: float = 0.408, - vmin: Optional[float] = None, - vmax: Optional[float] = None, + vmin: float | None = None, + vmax: float | None = None, ) -> Styler: """ Color the background in a gradient style. @@ -1375,8 +1375,8 @@ def _background_gradient( low: float = 0, high: float = 0, text_color_threshold: float = 0.408, - vmin: Optional[float] = None, - vmax: Optional[float] = None, + vmin: float | None = None, + vmax: float | None = None, ): """ Color background in a range according to the data. @@ -1462,10 +1462,10 @@ def set_properties(self, subset=None, **kwargs) -> Styler: def _bar( s, align: str, - colors: List[str], + colors: list[str], width: float = 100, - vmin: Optional[float] = None, - vmax: Optional[float] = None, + vmin: float | None = None, + vmax: float | None = None, ): """ Draw bar chart in dataframe cells. @@ -1521,12 +1521,12 @@ def css(x): def bar( self, subset=None, - axis: Optional[Axis] = 0, + axis: Axis | None = 0, color="#d65f5f", width: float = 100, align: str = "left", - vmin: Optional[float] = None, - vmax: Optional[float] = None, + vmin: float | None = None, + vmax: float | None = None, ) -> Styler: """ Draw bar chart in the cell backgrounds. @@ -1603,7 +1603,7 @@ def bar( return self def highlight_max( - self, subset=None, color: str = "yellow", axis: Optional[Axis] = 0 + self, subset=None, color: str = "yellow", axis: Axis | None = 0 ) -> Styler: """ Highlight the maximum by shading the background. @@ -1625,7 +1625,7 @@ def highlight_max( return self._highlight_handler(subset=subset, color=color, axis=axis, max_=True) def highlight_min( - self, subset=None, color: str = "yellow", axis: Optional[Axis] = 0 + self, subset=None, color: str = "yellow", axis: Axis | None = 0 ) -> Styler: """ Highlight the minimum by shading the background. @@ -1652,7 +1652,7 @@ def _highlight_handler( self, subset=None, color: str = "yellow", - axis: Optional[Axis] = None, + axis: Axis | None = None, max_: bool = True, ) -> Styler: subset = non_reducing_slice(maybe_numeric_slice(self.data, subset)) @@ -1816,7 +1816,7 @@ class _Tooltips: def __init__( self, - css_props: Sequence[Tuple[str, Union[str, int, float]]] = [ + css_props: Sequence[tuple[str, str | int | float]] = [ ("visibility", "hidden"), ("position", "absolute"), ("z-index", 1), @@ -1830,7 +1830,7 @@ def __init__( self.class_name = css_name self.class_properties = css_props self.tt_data = tooltips - self.table_styles: List[Dict[str, Union[str, List[Tuple[str, str]]]]] = [] + self.table_styles: list[dict[str, str | list[tuple[str, str]]]] = [] @property def _class_styles(self): @@ -1899,7 +1899,7 @@ def _pseudo_css(self, uuid: str, name: str, row: int, col: int, text: str): }, ] - def _translate(self, styler_data: FrameOrSeriesUnion, uuid: str, d: Dict): + def _translate(self, styler_data: FrameOrSeriesUnion, uuid: str, d: dict): """ Mutate the render dictionary to allow for tooltips: @@ -2008,7 +2008,7 @@ def _get_level_lengths(index, hidden_elements=None): def _maybe_wrap_formatter( - formatter: Union[Callable, str], na_rep: Optional[str] + formatter: Callable | str, na_rep: str | None ) -> Callable: if isinstance(formatter, str): formatter_func = lambda x: formatter.format(x) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 260d688ccb0cc..f111b4dbf8168 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -22,18 +22,18 @@ def _try_import(): def read_gbq( query: str, - project_id: Optional[str] = None, - index_col: Optional[str] = None, - col_order: Optional[List[str]] = None, + project_id: str | None = None, + index_col: str | None = None, + col_order: list[str] | None = None, reauth: bool = False, auth_local_webserver: bool = False, - dialect: Optional[str] = None, - location: Optional[str] = None, - configuration: Optional[Dict[str, Any]] = None, + dialect: str | None = None, + location: str | None = None, + configuration: dict[str, Any] | None = None, credentials=None, - use_bqstorage_api: Optional[bool] = None, - max_results: Optional[int] = None, - progress_bar_type: Optional[str] = None, + use_bqstorage_api: bool | None = None, + max_results: int | None = None, + progress_bar_type: str | None = None, ) -> DataFrame: """ Load data from Google BigQuery. @@ -170,7 +170,7 @@ def read_gbq( """ pandas_gbq = _try_import() - kwargs: Dict[str, Union[str, bool, int, None]] = {} + kwargs: dict[str, str | bool | int | None] = {} # START: new kwargs. Don't populate unless explicitly set. if use_bqstorage_api is not None: @@ -199,13 +199,13 @@ def read_gbq( def to_gbq( dataframe: DataFrame, destination_table: str, - project_id: Optional[str] = None, - chunksize: Optional[int] = None, + project_id: str | None = None, + chunksize: int | None = None, reauth: bool = False, if_exists: str = "fail", auth_local_webserver: bool = False, - table_schema: Optional[List[Dict[str, str]]] = None, - location: Optional[str] = None, + table_schema: list[dict[str, str]] | None = None, + location: str | None = None, progress_bar: bool = True, credentials=None, ) -> None: diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 8dcc9fa490635..7b6e45111748e 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -34,7 +34,7 @@ def nested_to_record( prefix: str = "", sep: str = ".", level: int = 0, - max_level: Optional[int] = None, + max_level: int | None = None, ): """ A simplified json_normalize @@ -111,14 +111,14 @@ def nested_to_record( def _json_normalize( - data: Union[Dict, List[Dict]], - record_path: Optional[Union[str, List]] = None, - meta: Optional[Union[str, List[Union[str, List[str]]]]] = None, - meta_prefix: Optional[str] = None, - record_prefix: Optional[str] = None, + data: dict | list[dict], + record_path: str | list | None = None, + meta: str | list[str | list[str]] | None = None, + meta_prefix: str | None = None, + record_prefix: str | None = None, errors: str = "raise", sep: str = ".", - max_level: Optional[int] = None, + max_level: int | None = None, ) -> DataFrame: """ Normalize semi-structured JSON data into a flat table. @@ -229,8 +229,8 @@ def _json_normalize( """ def _pull_field( - js: Dict[str, Any], spec: Union[List, str] - ) -> Union[Scalar, Iterable]: + js: dict[str, Any], spec: list | str + ) -> Scalar | Iterable: """Internal function to pull field""" result = js if isinstance(spec, list): @@ -240,7 +240,7 @@ def _pull_field( result = result[spec] return result - def _pull_records(js: Dict[str, Any], spec: Union[List, str]) -> List: + def _pull_records(js: dict[str, Any], spec: list | str) -> list: """ Internal function to pull field for records, and similar to _pull_field, but require to return list. And will raise error @@ -294,7 +294,7 @@ def _pull_records(js: Dict[str, Any], spec: Union[List, str]) -> List: _meta = [m if isinstance(m, list) else [m] for m in meta] # Disastrously inefficient for now - records: List = [] + records: list = [] lengths = [] meta_vals: DefaultDict = defaultdict(list) diff --git a/pandas/io/orc.py b/pandas/io/orc.py index a219be99540dc..18998884a2cca 100644 --- a/pandas/io/orc.py +++ b/pandas/io/orc.py @@ -13,7 +13,7 @@ def read_orc( - path: FilePathOrBuffer, columns: Optional[List[str]] = None, **kwargs + path: FilePathOrBuffer, columns: list[str] | None = None, **kwargs ) -> DataFrame: """ Load an ORC object from the file path, returning a DataFrame. diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 0a322059ed77c..eb4e570e56e3c 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -64,7 +64,7 @@ def _get_path_or_handle( storage_options: StorageOptions = None, mode: str = "rb", is_dir: bool = False, -) -> Tuple[FilePathOrBuffer, Optional[IOHandles], Any]: +) -> tuple[FilePathOrBuffer, IOHandles | None, Any]: """File handling for PyArrow.""" path_or_handle = stringify_path(path) if is_fsspec_url(path_or_handle) and fs is None: @@ -148,15 +148,15 @@ def write( self, df: DataFrame, path: FilePathOrBuffer[AnyStr], - compression: Optional[str] = "snappy", - index: Optional[bool] = None, + compression: str | None = "snappy", + index: bool | None = None, storage_options: StorageOptions = None, - partition_cols: Optional[List[str]] = None, + partition_cols: list[str] | None = None, **kwargs, ): self.validate_dataframe(df) - from_pandas_kwargs: Dict[str, Any] = {"schema": kwargs.pop("schema", None)} + from_pandas_kwargs: dict[str, Any] = {"schema": kwargs.pop("schema", None)} if index is not None: from_pandas_kwargs["preserve_index"] = index @@ -334,14 +334,14 @@ def read( @doc(storage_options=generic._shared_docs["storage_options"]) def to_parquet( df: DataFrame, - path: Optional[FilePathOrBuffer] = None, + path: FilePathOrBuffer | None = None, engine: str = "auto", - compression: Optional[str] = "snappy", - index: Optional[bool] = None, + compression: str | None = "snappy", + index: bool | None = None, storage_options: StorageOptions = None, - partition_cols: Optional[List[str]] = None, + partition_cols: list[str] | None = None, **kwargs, -) -> Optional[bytes]: +) -> bytes | None: """ Write a DataFrame to the parquet format. diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 8917be1f558b2..0354d1a35afd4 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -239,15 +239,15 @@ def to_hdf( key: str, value: FrameOrSeries, mode: str = "a", - complevel: Optional[int] = None, - complib: Optional[str] = None, + complevel: int | None = None, + complib: str | None = None, append: bool = False, - format: Optional[str] = None, + format: str | None = None, index: bool = True, - min_itemsize: Optional[Union[int, Dict[str, int]]] = None, + min_itemsize: int | dict[str, int] | None = None, nan_rep=None, - dropna: Optional[bool] = None, - data_columns: Optional[Union[bool, List[str]]] = None, + dropna: bool | None = None, + data_columns: bool | list[str] | None = None, errors: str = "strict", encoding: str = "UTF-8", ): @@ -296,11 +296,11 @@ def read_hdf( mode: str = "r", errors: str = "strict", where=None, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, columns=None, iterator=False, - chunksize: Optional[int] = None, + chunksize: int | None = None, **kwargs, ): """ @@ -529,7 +529,7 @@ class HDFStore: >>> store.close() # only now, data is written to disk """ - _handle: Optional[File] + _handle: File | None _mode: str _complevel: int _fletcher32: bool @@ -538,7 +538,7 @@ def __init__( self, path, mode: str = "a", - complevel: Optional[int] = None, + complevel: int | None = None, complib=None, fletcher32: bool = False, **kwargs, @@ -626,7 +626,7 @@ def __enter__(self): def __exit__(self, exc_type, exc_value, traceback): self.close() - def keys(self, include: str = "pandas") -> List[str]: + def keys(self, include: str = "pandas") -> list[str]: """ Return a list of keys corresponding to objects stored in HDFStore. @@ -856,8 +856,8 @@ def select_as_coordinates( self, key: str, where=None, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ): """ return the selection as an Index @@ -888,8 +888,8 @@ def select_column( self, key: str, column: str, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ): """ return a single column from the table. This is generally only useful to @@ -1054,10 +1054,10 @@ def put( index=True, append=False, complib=None, - complevel: Optional[int] = None, - min_itemsize: Optional[Union[int, Dict[str, int]]] = None, + complevel: int | None = None, + min_itemsize: int | dict[str, int] | None = None, nan_rep=None, - data_columns: Optional[List[str]] = None, + data_columns: list[str] | None = None, encoding=None, errors: str = "strict", track_times: bool = True, @@ -1180,14 +1180,14 @@ def append( index=True, append=True, complib=None, - complevel: Optional[int] = None, + complevel: int | None = None, columns=None, - min_itemsize: Optional[Union[int, Dict[str, int]]] = None, + min_itemsize: int | dict[str, int] | None = None, nan_rep=None, chunksize=None, expectedrows=None, - dropna: Optional[bool] = None, - data_columns: Optional[List[str]] = None, + dropna: bool | None = None, + data_columns: list[str] | None = None, encoding=None, errors: str = "strict", ): @@ -1258,7 +1258,7 @@ def append( def append_to_multiple( self, - d: Dict, + d: dict, value, selector, data_columns=None, @@ -1309,7 +1309,7 @@ def append_to_multiple( # figure out how to split the value remain_key = None - remain_values: List = [] + remain_values: list = [] for k, v in d.items(): if v is None: if remain_key is not None: @@ -1357,8 +1357,8 @@ def create_table_index( self, key: str, columns=None, - optlevel: Optional[int] = None, - kind: Optional[str] = None, + optlevel: int | None = None, + kind: str | None = None, ): """ Create a pytables index on the table. @@ -1471,7 +1471,7 @@ def walk(self, where="/"): yield (g._v_pathname.rstrip("/"), groups, leaves) - def get_node(self, key: str) -> Optional[Node]: + def get_node(self, key: str) -> Node | None: """ return the node with the key or None if it does not exist """ self._check_if_open() if not key.startswith("/"): @@ -1487,7 +1487,7 @@ def get_node(self, key: str) -> Optional[Node]: assert isinstance(node, _table_mod.Node), type(node) return node - def get_storer(self, key: str) -> Union[GenericFixed, Table]: + def get_storer(self, key: str) -> GenericFixed | Table: """ return the storer object for a key, raise if not in the file """ group = self.get_node(key) if group is None: @@ -1504,7 +1504,7 @@ def copy( propindexes: bool = True, keys=None, complib=None, - complevel: Optional[int] = None, + complevel: int | None = None, fletcher32: bool = False, overwrite=True, ): @@ -1543,7 +1543,7 @@ def copy( data = self.select(k) if isinstance(s, Table): - index: Union[bool, List[str]] = False + index: bool | list[str] = False if propindexes: index = [a.name for a in s.axes if a.is_indexed] new_store.append( @@ -1618,12 +1618,12 @@ def _create_storer( self, group, format=None, - value: Optional[FrameOrSeries] = None, + value: FrameOrSeries | None = None, encoding: str = "UTF-8", errors: str = "strict", - ) -> Union[GenericFixed, Table]: + ) -> GenericFixed | Table: """ return a suitable class to operate """ - cls: Union[Type[GenericFixed], Type[Table]] + cls: type[GenericFixed] | type[Table] if value is not None and not isinstance(value, (Series, DataFrame)): raise TypeError("value must be None, Series, or DataFrame") @@ -1715,9 +1715,9 @@ def _write_to_group( index=True, append=False, complib=None, - complevel: Optional[int] = None, + complevel: int | None = None, fletcher32=None, - min_itemsize: Optional[Union[int, Dict[str, int]]] = None, + min_itemsize: int | dict[str, int] | None = None, chunksize=None, expectedrows=None, dropna=False, @@ -1833,21 +1833,21 @@ class TableIterator: Whether to automatically close the store at the end of iteration. """ - chunksize: Optional[int] + chunksize: int | None store: HDFStore - s: Union[GenericFixed, Table] + s: GenericFixed | Table def __init__( self, store: HDFStore, - s: Union[GenericFixed, Table], + s: GenericFixed | Table, func, where, nrows, start=None, stop=None, iterator: bool = False, - chunksize: Optional[int] = None, + chunksize: int | None = None, auto_close: bool = False, ): self.store = store @@ -1952,7 +1952,7 @@ def __init__( values=None, kind=None, typ=None, - cname: Optional[str] = None, + cname: str | None = None, axis=None, pos=None, freq=None, @@ -2053,7 +2053,7 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): if self.freq is not None: kwargs["freq"] = _ensure_decoded(self.freq) - factory: Union[Type[Index], Type[DatetimeIndex]] = Index + factory: type[Index] | type[DatetimeIndex] = Index if is_datetime64_dtype(values.dtype) or is_datetime64tz_dtype(values.dtype): factory = DatetimeIndex @@ -2266,7 +2266,7 @@ def __init__( table=None, meta=None, metadata=None, - dtype: Optional[DtypeArg] = None, + dtype: DtypeArg | None = None, data=None, ): super().__init__( @@ -2363,7 +2363,7 @@ def get_atom_string(cls, shape, itemsize): return _tables().StringCol(itemsize=itemsize, shape=shape[0]) @classmethod - def get_atom_coltype(cls, kind: str) -> Type[Col]: + def get_atom_coltype(cls, kind: str) -> type[Col]: """ return the PyTables column class for this column """ if kind.startswith("uint"): k4 = kind[4:] @@ -2568,7 +2568,7 @@ class Fixed: pandas_kind: str format_type: str = "fixed" # GH#30962 needed by dask - obj_type: Type[FrameOrSeriesUnion] + obj_type: type[FrameOrSeriesUnion] ndim: int encoding: str parent: HDFStore @@ -2596,7 +2596,7 @@ def is_old_version(self) -> bool: return self.version[0] <= 0 and self.version[1] <= 10 and self.version[2] < 1 @property - def version(self) -> Tuple[int, int, int]: + def version(self) -> tuple[int, int, int]: """ compute and set our version """ version = _ensure_decoded(getattr(self.group._v_attrs, "pandas_version", None)) try: @@ -2705,8 +2705,8 @@ def read( self, where=None, columns=None, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ): raise NotImplementedError( "cannot read on an abstract storer: subclasses should implement" @@ -2718,7 +2718,7 @@ def write(self, **kwargs): ) def delete( - self, where=None, start: Optional[int] = None, stop: Optional[int] = None + self, where=None, start: int | None = None, stop: int | None = None ): """ support fully deleting the node in its entirety (only) - where @@ -2736,7 +2736,7 @@ class GenericFixed(Fixed): _index_type_map = {DatetimeIndex: "datetime", PeriodIndex: "period"} _reverse_index_map = {v: k for k, v in _index_type_map.items()} - attributes: List[str] = [] + attributes: list[str] = [] # indexer helpers def _class_to_alias(self, cls) -> str: @@ -2829,7 +2829,7 @@ def write(self, obj, **kwargs): self.set_attrs() def read_array( - self, key: str, start: Optional[int] = None, stop: Optional[int] = None + self, key: str, start: int | None = None, stop: int | None = None ): """ read an array for the specified node (off of group """ import tables @@ -2865,7 +2865,7 @@ def read_array( return ret def read_index( - self, key: str, start: Optional[int] = None, stop: Optional[int] = None + self, key: str, start: int | None = None, stop: int | None = None ) -> Index: variety = _ensure_decoded(getattr(self.attrs, f"{key}_variety")) @@ -2927,13 +2927,13 @@ def write_multi_index(self, key: str, index: MultiIndex): self.write_array(label_key, level_codes) def read_multi_index( - self, key: str, start: Optional[int] = None, stop: Optional[int] = None + self, key: str, start: int | None = None, stop: int | None = None ) -> MultiIndex: nlevels = getattr(self.attrs, f"{key}_nlevels") levels = [] codes = [] - names: List[Hashable] = [] + names: list[Hashable] = [] for i in range(nlevels): level_key = f"{key}_level{i}" node = getattr(self.group, level_key) @@ -2950,7 +2950,7 @@ def read_multi_index( ) def read_index_node( - self, node: Node, start: Optional[int] = None, stop: Optional[int] = None + self, node: Node, start: int | None = None, stop: int | None = None ) -> Index: data = node[start:stop] # If the index was an empty array write_array_empty() will @@ -2996,7 +2996,7 @@ def write_array_empty(self, key: str, value: ArrayLike): node._v_attrs.value_type = str(value.dtype) node._v_attrs.shape = value.shape - def write_array(self, key: str, obj: FrameOrSeries, items: Optional[Index] = None): + def write_array(self, key: str, obj: FrameOrSeries, items: Index | None = None): # TODO: we only have a few tests that get here, the only EA # that gets passed is DatetimeArray, and we never have # both self._filters and EA @@ -3095,8 +3095,8 @@ def read( self, where=None, columns=None, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ): self.validate_read(columns, where) index = self.read_index("index", start=start, stop=stop) @@ -3116,7 +3116,7 @@ class BlockManagerFixed(GenericFixed): nblocks: int @property - def shape(self) -> Optional[Shape]: + def shape(self) -> Shape | None: try: ndim = self.ndim @@ -3146,8 +3146,8 @@ def read( self, where=None, columns=None, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ): # start, stop applied to rows, so 0th axis only self.validate_read(columns, where) @@ -3233,15 +3233,15 @@ class Table(Fixed): pandas_kind = "wide_table" format_type: str = "table" # GH#30962 needed by dask table_type: str - levels: Union[int, List[Hashable]] = 1 + levels: int | list[Hashable] = 1 is_table = True - index_axes: List[IndexCol] - non_index_axes: List[Tuple[int, Any]] - values_axes: List[DataCol] - data_columns: List - metadata: List - info: Dict + index_axes: list[IndexCol] + non_index_axes: list[tuple[int, Any]] + values_axes: list[DataCol] + data_columns: list + metadata: list + info: dict def __init__( self, @@ -3331,7 +3331,7 @@ def is_multi_index(self) -> bool: def validate_multiindex( self, obj: FrameOrSeriesUnion - ) -> Tuple[DataFrame, List[Hashable]]: + ) -> tuple[DataFrame, list[Hashable]]: """ validate that we can store the multi-index; reset and return the new object @@ -3398,7 +3398,7 @@ def data_orientation(self): ) ) - def queryables(self) -> Dict[str, Any]: + def queryables(self) -> dict[str, Any]: """ return a dict of the kinds allowable columns for this object """ # mypy doesn't recognize DataFrame._AXIS_NAMES, so we re-write it here axis_names = {0: "index", 1: "columns"} @@ -3419,7 +3419,7 @@ def index_cols(self): # Note: each `i.cname` below is assured to be a str. return [(i.axis, i.cname) for i in self.index_axes] - def values_cols(self) -> List[str]: + def values_cols(self) -> list[str]: """ return a list of my values cols """ return [i.cname for i in self.values_axes] @@ -3474,7 +3474,7 @@ def get_attrs(self): self.nan_rep = getattr(self.attrs, "nan_rep", None) self.encoding = _ensure_encoding(getattr(self.attrs, "encoding", None)) self.errors = _ensure_decoded(getattr(self.attrs, "errors", "strict")) - self.levels: List[Hashable] = getattr(self.attrs, "levels", None) or [] + self.levels: list[Hashable] = getattr(self.attrs, "levels", None) or [] self.index_axes = [a for a in self.indexables if a.is_an_indexable] self.values_axes = [a for a in self.indexables if not a.is_an_indexable] @@ -3581,7 +3581,7 @@ def f(i, c): return _indexables - def create_index(self, columns=None, optlevel=None, kind: Optional[str] = None): + def create_index(self, columns=None, optlevel=None, kind: str | None = None): """ Create a pytables index on the specified columns. @@ -3666,8 +3666,8 @@ def create_index(self, columns=None, optlevel=None, kind: Optional[str] = None): ) def _read_axes( - self, where, start: Optional[int] = None, stop: Optional[int] = None - ) -> List[Tuple[ArrayLike, ArrayLike]]: + self, where, start: int | None = None, stop: int | None = None + ) -> list[tuple[ArrayLike, ArrayLike]]: """ Create the axes sniffed from the table. @@ -3808,7 +3808,7 @@ def _create_axes( ) # create according to the new data - new_non_index_axes: List = [] + new_non_index_axes: list = [] # nan_representation if nan_rep is None: @@ -3891,7 +3891,7 @@ def _create_axes( # make sure that we match up the existing columns # if we have an existing table - existing_col: Optional[DataCol] + existing_col: DataCol | None if table_exists and validate: try: @@ -3989,8 +3989,8 @@ def get_blk_items(mgr): mgr = frame._mgr mgr = cast(BlockManager, mgr) - blocks: List[Block] = list(mgr.blocks) - blk_items: List[Index] = get_blk_items(mgr) + blocks: list[Block] = list(mgr.blocks) + blk_items: list[Index] = get_blk_items(mgr) if len(data_columns): axis, axis_labels = new_non_index_axes[0] @@ -4010,7 +4010,7 @@ def get_blk_items(mgr): tuple(b_items.tolist()): (b, b_items) for b, b_items in zip(blocks, blk_items) } - new_blocks: List["Block"] = [] + new_blocks: list["Block"] = [] new_blk_items = [] for ea in values_axes: items = tuple(ea.values) @@ -4090,10 +4090,10 @@ def process_filter(field, filt): def create_description( self, complib, - complevel: Optional[int], + complevel: int | None, fletcher32: bool, - expectedrows: Optional[int], - ) -> Dict[str, Any]: + expectedrows: int | None, + ) -> dict[str, Any]: """ create the description of the table from the axes & values """ # provided expected rows if its passed if expectedrows is None: @@ -4119,7 +4119,7 @@ def create_description( return d def read_coordinates( - self, where=None, start: Optional[int] = None, stop: Optional[int] = None + self, where=None, start: int | None = None, stop: int | None = None ): """ select coordinates (row numbers) from a table; return the @@ -4148,8 +4148,8 @@ def read_column( self, column: str, where=None, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ): """ return a single column from the table, generally only indexables @@ -4201,8 +4201,8 @@ def read( self, where=None, columns=None, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ): """ read the indices and the indexing array, calculate offset rows and return @@ -4283,7 +4283,7 @@ def write( # add the rows table.write_data(chunksize, dropna=dropna) - def write_data(self, chunksize: Optional[int], dropna: bool = False): + def write_data(self, chunksize: int | None, dropna: bool = False): """ we form the data into a 2-d including indexes,values,mask write chunk-by-chunk """ @@ -4345,9 +4345,9 @@ def write_data(self, chunksize: Optional[int], dropna: bool = False): def write_data_chunk( self, rows: np.ndarray, - indexes: List[np.ndarray], - mask: Optional[np.ndarray], - values: List[np.ndarray], + indexes: list[np.ndarray], + mask: np.ndarray | None, + values: list[np.ndarray], ): """ Parameters @@ -4387,7 +4387,7 @@ def write_data_chunk( self.table.flush() def delete( - self, where=None, start: Optional[int] = None, stop: Optional[int] = None + self, where=None, start: int | None = None, stop: int | None = None ): # delete all rows (and return the nrows) @@ -4455,7 +4455,7 @@ class AppendableFrameTable(AppendableTable): pandas_kind = "frame_table" table_type = "appendable_frame" ndim = 2 - obj_type: Type[FrameOrSeriesUnion] = DataFrame + obj_type: type[FrameOrSeriesUnion] = DataFrame @property def is_transposed(self) -> bool: @@ -4472,8 +4472,8 @@ def read( self, where=None, columns=None, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ): # validate the version @@ -4576,8 +4576,8 @@ def read( self, where=None, columns=None, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ) -> Series: is_multi_index = self.is_multi_index @@ -4622,7 +4622,7 @@ class GenericTable(AppendableFrameTable): table_type = "generic_table" ndim = 2 obj_type = DataFrame - levels: List[Hashable] + levels: list[Hashable] @property def pandas_type(self) -> str: @@ -4656,7 +4656,7 @@ def indexables(self): name="index", axis=0, table=self.table, meta=meta, metadata=md ) - _indexables: List[Union[GenericIndexCol, GenericDataIndexableCol]] = [index_col] + _indexables: list[GenericIndexCol | GenericDataIndexableCol] = [index_col] for i, n in enumerate(d._v_names): assert isinstance(n, str) @@ -4709,8 +4709,8 @@ def read( self, where=None, columns=None, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ): df = super().read(where=where, columns=columns, start=start, stop=stop) @@ -4739,7 +4739,7 @@ def _reindex_axis(obj: DataFrame, axis: int, labels: Index, other=None) -> DataF if other is not None: labels = ensure_index(other.unique()).intersection(labels, sort=False) if not labels.equals(ax): - slicer: List[Union[slice, Index]] = [slice(None, None)] * obj.ndim + slicer: list[slice | Index] = [slice(None, None)] * obj.ndim slicer[axis] = labels obj = obj.loc[tuple(slicer)] return obj @@ -4748,17 +4748,17 @@ def _reindex_axis(obj: DataFrame, axis: int, labels: Index, other=None) -> DataF # tz to/from coercion -def _get_tz(tz: tzinfo) -> Union[str, tzinfo]: +def _get_tz(tz: tzinfo) -> str | tzinfo: """ for a tz-aware type, return an encoded zone """ zone = timezones.get_timezone(tz) return zone def _set_tz( - values: Union[np.ndarray, Index], - tz: Optional[Union[str, tzinfo]], + values: np.ndarray | Index, + tz: str | tzinfo | None, coerce: bool = False, -) -> Union[np.ndarray, DatetimeIndex]: +) -> np.ndarray | DatetimeIndex: """ coerce the values to a DatetimeIndex if tz is set preserve the input shape if possible @@ -4853,8 +4853,8 @@ def _convert_index(name: str, index: Index, encoding: str, errors: str) -> Index def _unconvert_index( data, kind: str, encoding: str, errors: str -) -> Union[np.ndarray, Index]: - index: Union[Index, np.ndarray] +) -> np.ndarray | Index: + index: Index | np.ndarray if kind == "datetime64": index = DatetimeIndex(data) @@ -4886,7 +4886,7 @@ def _maybe_convert_for_string_atom( nan_rep, encoding, errors, - columns: List[str], + columns: list[str], ): bvalues = block.values @@ -4908,7 +4908,7 @@ def _maybe_convert_for_string_atom( elif not (inferred_type == "string" or dtype_name == "object"): return bvalues - blocks: List[Block] = block.fillna(nan_rep, downcast=False) + blocks: list[Block] = block.fillna(nan_rep, downcast=False) # Note: because block is always object dtype, fillna goes # through a path such that the result is always a 1-element list assert len(blocks) == 1 @@ -5146,8 +5146,8 @@ def __init__( self, table: Table, where=None, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ): self.table = table self.where = where diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 8888be02dd5ea..236c1d8990685 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -38,9 +38,9 @@ def __exit__(self, exc_type, exc_value, traceback): @overload def read_sas( filepath_or_buffer: FilePathOrBuffer, - format: Optional[str] = ..., - index: Optional[Hashable] = ..., - encoding: Optional[str] = ..., + format: str | None = ..., + index: Hashable | None = ..., + encoding: str | None = ..., chunksize: int = ..., iterator: bool = ..., ) -> ReaderBase: @@ -50,23 +50,23 @@ def read_sas( @overload def read_sas( filepath_or_buffer: FilePathOrBuffer, - format: Optional[str] = ..., - index: Optional[Hashable] = ..., - encoding: Optional[str] = ..., + format: str | None = ..., + index: Hashable | None = ..., + encoding: str | None = ..., chunksize: None = ..., iterator: bool = ..., -) -> Union[DataFrame, ReaderBase]: +) -> DataFrame | ReaderBase: ... def read_sas( filepath_or_buffer: FilePathOrBuffer, - format: Optional[str] = None, - index: Optional[Hashable] = None, - encoding: Optional[str] = None, - chunksize: Optional[int] = None, + format: str | None = None, + index: Hashable | None = None, + encoding: str | None = None, + chunksize: int | None = None, iterator: bool = False, -) -> Union[DataFrame, ReaderBase]: +) -> DataFrame | ReaderBase: """ Read SAS files stored as either XPORT or SAS7BDAT format files. diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 9a5c9e4a2e2b2..92b34b96049e0 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -629,12 +629,12 @@ def __init__(self, catarray: Series, encoding: str = "latin-1"): self.value_labels = list(zip(np.arange(len(categories)), categories)) self.value_labels.sort(key=lambda x: x[0]) self.text_len = 0 - self.txt: List[bytes] = [] + self.txt: list[bytes] = [] self.n = 0 # Compute lengths and setup lists of offsets and labels - offsets: List[int] = [] - values: List[int] = [] + offsets: list[int] = [] + values: list[int] = [] for vl in self.value_labels: category = vl[1] if not isinstance(category, str): @@ -754,7 +754,7 @@ class StataMissingValue: """ # Construct a dictionary of missing values - MISSING_VALUES: Dict[float, str] = {} + MISSING_VALUES: dict[float, str] = {} bases = (101, 32741, 2147483621) for b in bases: # Conversion to long to avoid hash issues on 32 bit platforms #8968 @@ -790,7 +790,7 @@ class StataMissingValue: "float64": struct.unpack(" str: return self._str @property - def value(self) -> Union[int, float]: + def value(self) -> int | float: """ The binary representation of the missing value. @@ -834,7 +834,7 @@ def __eq__(self, other: Any) -> bool: ) @classmethod - def get_base_missing_value(cls, dtype: np.dtype) -> Union[int, float]: + def get_base_missing_value(cls, dtype: np.dtype) -> int | float: if dtype == np.int8: value = cls.BASE_MISSING_VALUES["int8"] elif dtype == np.int16: @@ -1020,16 +1020,16 @@ def __init__( path_or_buf: FilePathOrBuffer, convert_dates: bool = True, convert_categoricals: bool = True, - index_col: Optional[str] = None, + index_col: str | None = None, convert_missing: bool = False, preserve_dtypes: bool = True, - columns: Optional[Sequence[str]] = None, + columns: Sequence[str] | None = None, order_categoricals: bool = True, - chunksize: Optional[int] = None, + chunksize: int | None = None, storage_options: StorageOptions = None, ): super().__init__() - self.col_sizes: List[int] = [] + self.col_sizes: list[int] = [] # Arguments to the reader (can be temporarily overridden in # calls to read). @@ -1055,7 +1055,7 @@ def __init__( self._column_selector_set = False self._value_labels_read = False self._data_read = False - self._dtype: Optional[np.dtype] = None + self._dtype: np.dtype | None = None self._lines_read = 0 self._native_byteorder = _set_endianness(sys.byteorder) @@ -1184,7 +1184,7 @@ def _read_new_header(self) -> None: # Get data type information, works for versions 117-119. def _get_dtypes( self, seek_vartypes: int - ) -> Tuple[List[Union[int, str]], List[Union[str, np.dtype]]]: + ) -> tuple[list[int | str], list[str | np.dtype]]: self.path_or_buf.seek(seek_vartypes) raw_typlist = [ @@ -1192,7 +1192,7 @@ def _get_dtypes( for _ in range(self.nvar) ] - def f(typ: int) -> Union[int, str]: + def f(typ: int) -> int | str: if typ <= 2045: return typ try: @@ -1202,7 +1202,7 @@ def f(typ: int) -> Union[int, str]: typlist = [f(x) for x in raw_typlist] - def g(typ: int) -> Union[str, np.dtype]: + def g(typ: int) -> str | np.dtype: if typ <= 2045: return str(typ) try: @@ -1214,13 +1214,13 @@ def g(typ: int) -> Union[str, np.dtype]: return typlist, dtyplist - def _get_varlist(self) -> List[str]: + def _get_varlist(self) -> list[str]: # 33 in order formats, 129 in formats 118 and 119 b = 33 if self.format_version < 118 else 129 return [self._decode(self.path_or_buf.read(b)) for _ in range(self.nvar)] # Returns the format list - def _get_fmtlist(self) -> List[str]: + def _get_fmtlist(self) -> list[str]: if self.format_version >= 118: b = 57 elif self.format_version > 113: @@ -1233,7 +1233,7 @@ def _get_fmtlist(self) -> List[str]: return [self._decode(self.path_or_buf.read(b)) for _ in range(self.nvar)] # Returns the label list - def _get_lbllist(self) -> List[str]: + def _get_lbllist(self) -> list[str]: if self.format_version >= 118: b = 129 elif self.format_version > 108: @@ -1242,7 +1242,7 @@ def _get_lbllist(self) -> List[str]: b = 9 return [self._decode(self.path_or_buf.read(b)) for _ in range(self.nvar)] - def _get_variable_labels(self) -> List[str]: + def _get_variable_labels(self) -> list[str]: if self.format_version >= 118: vlblist = [ self._decode(self.path_or_buf.read(321)) for _ in range(self.nvar) @@ -1401,7 +1401,7 @@ def _setup_dtype(self) -> np.dtype: return self._dtype - def _calcsize(self, fmt: Union[int, str]) -> int: + def _calcsize(self, fmt: int | str) -> int: if isinstance(fmt, int): return fmt return struct.calcsize(self.byteorder + fmt) @@ -1430,7 +1430,7 @@ def _read_value_labels(self) -> None: if self.format_version <= 108: # Value labels are not supported in version 108 and earlier. self._value_labels_read = True - self.value_label_dict: Dict[str, Dict[Union[float, int], str]] = {} + self.value_label_dict: dict[str, dict[float | int, str]] = {} return if self.format_version >= 117: @@ -1512,7 +1512,7 @@ def __next__(self) -> DataFrame: self._using_iterator = True return self.read(nrows=self._chunksize) - def get_chunk(self, size: Optional[int] = None) -> DataFrame: + def get_chunk(self, size: int | None = None) -> DataFrame: """ Reads lines from Stata file and returns as dataframe @@ -1532,14 +1532,14 @@ def get_chunk(self, size: Optional[int] = None) -> DataFrame: @Appender(_read_method_doc) def read( self, - nrows: Optional[int] = None, - convert_dates: Optional[bool] = None, - convert_categoricals: Optional[bool] = None, - index_col: Optional[str] = None, - convert_missing: Optional[bool] = None, - preserve_dtypes: Optional[bool] = None, - columns: Optional[Sequence[str]] = None, - order_categoricals: Optional[bool] = None, + nrows: int | None = None, + convert_dates: bool | None = None, + convert_categoricals: bool | None = None, + index_col: str | None = None, + convert_missing: bool | None = None, + preserve_dtypes: bool | None = None, + columns: Sequence[str] | None = None, + order_categoricals: bool | None = None, ) -> DataFrame: # Handle empty file or chunk. If reading incrementally raise # StopIteration. If reading the whole thing return an empty @@ -1782,7 +1782,7 @@ def _do_select_columns(self, data: DataFrame, columns: Sequence[str]) -> DataFra def _do_convert_categoricals( self, data: DataFrame, - value_label_dict: Dict[str, Dict[Union[float, int], str]], + value_label_dict: dict[str, dict[float | int, str]], lbllist: Sequence[str], order_categoricals: bool, ) -> DataFrame: @@ -1799,7 +1799,7 @@ def _do_convert_categoricals( column = data[col] key_matches = column.isin(keys) if self._using_iterator and key_matches.all(): - initial_categories: Optional[np.ndarray] = keys + initial_categories: np.ndarray | None = keys # If all categories are in the keys and we are iterating, # use the same keys for all chunks. If some are missing # value labels, then we will fall back to the categories @@ -1860,7 +1860,7 @@ def data_label(self) -> str: """ return self._data_label - def variable_labels(self) -> Dict[str, str]: + def variable_labels(self) -> dict[str, str]: """ Return variable labels as a dict, associating each variable name with corresponding label. @@ -1871,7 +1871,7 @@ def variable_labels(self) -> Dict[str, str]: """ return dict(zip(self.varlist, self._variable_labels)) - def value_labels(self) -> Dict[str, Dict[Union[float, int], str]]: + def value_labels(self) -> dict[str, dict[float | int, str]]: """ Return a dict, associating each variable name a dict, associating each value its corresponding label. @@ -1891,15 +1891,15 @@ def read_stata( filepath_or_buffer: FilePathOrBuffer, convert_dates: bool = True, convert_categoricals: bool = True, - index_col: Optional[str] = None, + index_col: str | None = None, convert_missing: bool = False, preserve_dtypes: bool = True, - columns: Optional[Sequence[str]] = None, + columns: Sequence[str] | None = None, order_categoricals: bool = True, - chunksize: Optional[int] = None, + chunksize: int | None = None, iterator: bool = False, storage_options: StorageOptions = None, -) -> Union[DataFrame, StataReader]: +) -> DataFrame | StataReader: reader = StataReader( filepath_or_buffer, @@ -1964,7 +1964,7 @@ def _convert_datetime_to_stata_type(fmt: str) -> np.dtype: raise NotImplementedError(f"Format {fmt} not implemented") -def _maybe_convert_to_int_keys(convert_dates: Dict, varlist: List[Hashable]) -> Dict: +def _maybe_convert_to_int_keys(convert_dates: dict, varlist: list[Hashable]) -> dict: new_dict = {} for key in convert_dates: if not convert_dates[key].startswith("%"): # make sure proper fmts @@ -2149,12 +2149,12 @@ def __init__( self, fname: FilePathOrBuffer, data: DataFrame, - convert_dates: Optional[Dict[Hashable, str]] = None, + convert_dates: dict[Hashable, str] | None = None, write_index: bool = True, - byteorder: Optional[str] = None, - time_stamp: Optional[datetime.datetime] = None, - data_label: Optional[str] = None, - variable_labels: Optional[Dict[Hashable, str]] = None, + byteorder: str | None = None, + time_stamp: datetime.datetime | None = None, + data_label: str | None = None, + variable_labels: dict[Hashable, str] | None = None, compression: CompressionOptions = "infer", storage_options: StorageOptions = None, ): @@ -2165,7 +2165,7 @@ def __init__( self._data_label = data_label self._variable_labels = variable_labels self._compression = compression - self._output_file: Optional[Buffer] = None + self._output_file: Buffer | None = None # attach nobs, nvars, data, varlist, typlist self._prepare_pandas(data) self.storage_options = storage_options @@ -2175,7 +2175,7 @@ def __init__( self._byteorder = _set_endianness(byteorder) self._fname = fname self.type_converters = {253: np.int32, 252: np.int16, 251: np.int8} - self._converted_names: Dict[Hashable, str] = {} + self._converted_names: dict[Hashable, str] = {} def _write(self, to_write: str) -> None: """ @@ -2198,7 +2198,7 @@ def _prepare_categoricals(self, data: DataFrame) -> DataFrame: """ is_cat = [is_categorical_dtype(data[col].dtype) for col in data] self._is_col_cat = is_cat - self._value_labels: List[StataValueLabel] = [] + self._value_labels: list[StataValueLabel] = [] if not any(is_cat): return data @@ -2297,7 +2297,7 @@ def _check_column_names(self, data: DataFrame) -> DataFrame: dates are exported, the variable name is propagated to the date conversion dictionary """ - converted_names: Dict[Hashable, str] = {} + converted_names: dict[Hashable, str] = {} columns = list(data.columns) original_columns = columns[:] @@ -2354,8 +2354,8 @@ def _check_column_names(self, data: DataFrame) -> DataFrame: return data def _set_formats_and_types(self, dtypes: Series) -> None: - self.fmtlist: List[str] = [] - self.typlist: List[int] = [] + self.fmtlist: list[str] = [] + self.typlist: list[int] = [] for col, dtype in dtypes.items(): self.fmtlist.append(_dtype_to_default_stata_fmt(dtype, self.data[col])) self.typlist.append(_dtype_to_stata_type(dtype, self.data[col])) @@ -2541,8 +2541,8 @@ def _write_value_labels(self) -> None: def _write_header( self, - data_label: Optional[str] = None, - time_stamp: Optional[datetime.datetime] = None, + data_label: str | None = None, + time_stamp: datetime.datetime | None = None, ) -> None: byteorder = self._byteorder # ds_format - just use 114 @@ -2742,7 +2742,7 @@ def _dtype_to_stata_type_117(dtype: np.dtype, column: Series, force_strl: bool) raise NotImplementedError(f"Data type {dtype} not supported.") -def _pad_bytes_new(name: Union[str, bytes], length: int) -> bytes: +def _pad_bytes_new(name: str | bytes, length: int) -> bytes: """ Takes a bytes instance and pads it with null bytes until it's length chars. """ @@ -2785,7 +2785,7 @@ def __init__( df: DataFrame, columns: Sequence[str], version: int = 117, - byteorder: Optional[str] = None, + byteorder: str | None = None, ): if version not in (117, 118, 119): raise ValueError("Only dta versions 117, 118 and 119 supported") @@ -2813,11 +2813,11 @@ def __init__( self._gso_o_type = gso_o_type self._gso_v_type = gso_v_type - def _convert_key(self, key: Tuple[int, int]) -> int: + def _convert_key(self, key: tuple[int, int]) -> int: v, o = key return v + self._o_offet * o - def generate_table(self) -> Tuple[Dict[str, Tuple[int, int]], DataFrame]: + def generate_table(self) -> tuple[dict[str, tuple[int, int]], DataFrame]: """ Generates the GSO lookup table for the DataFrame @@ -2868,7 +2868,7 @@ def generate_table(self) -> Tuple[Dict[str, Tuple[int, int]], DataFrame]: return gso_table, gso_df - def generate_blob(self, gso_table: Dict[str, Tuple[int, int]]) -> bytes: + def generate_blob(self, gso_table: dict[str, tuple[int, int]]) -> bytes: """ Generates the binary blob of GSOs that is written to the dta file. @@ -3025,18 +3025,18 @@ def __init__( self, fname: FilePathOrBuffer, data: DataFrame, - convert_dates: Optional[Dict[Hashable, str]] = None, + convert_dates: dict[Hashable, str] | None = None, write_index: bool = True, - byteorder: Optional[str] = None, - time_stamp: Optional[datetime.datetime] = None, - data_label: Optional[str] = None, - variable_labels: Optional[Dict[Hashable, str]] = None, - convert_strl: Optional[Sequence[Hashable]] = None, + byteorder: str | None = None, + time_stamp: datetime.datetime | None = None, + data_label: str | None = None, + variable_labels: dict[Hashable, str] | None = None, + convert_strl: Sequence[Hashable] | None = None, compression: CompressionOptions = "infer", storage_options: StorageOptions = None, ): # Copy to new list since convert_strl might be modified later - self._convert_strl: List[Hashable] = [] + self._convert_strl: list[Hashable] = [] if convert_strl is not None: self._convert_strl.extend(convert_strl) @@ -3052,11 +3052,11 @@ def __init__( compression=compression, storage_options=storage_options, ) - self._map: Dict[str, int] = {} + self._map: dict[str, int] = {} self._strl_blob = b"" @staticmethod - def _tag(val: Union[str, bytes], tag: str) -> bytes: + def _tag(val: str | bytes, tag: str) -> bytes: """Surround val with """ if isinstance(val, str): val = bytes(val, "utf-8") @@ -3069,8 +3069,8 @@ def _update_map(self, tag: str) -> None: def _write_header( self, - data_label: Optional[str] = None, - time_stamp: Optional[datetime.datetime] = None, + data_label: str | None = None, + time_stamp: datetime.datetime | None = None, ) -> None: """Write the file header""" byteorder = self._byteorder @@ -3417,14 +3417,14 @@ def __init__( self, fname: FilePathOrBuffer, data: DataFrame, - convert_dates: Optional[Dict[Hashable, str]] = None, + convert_dates: dict[Hashable, str] | None = None, write_index: bool = True, - byteorder: Optional[str] = None, - time_stamp: Optional[datetime.datetime] = None, - data_label: Optional[str] = None, - variable_labels: Optional[Dict[Hashable, str]] = None, - convert_strl: Optional[Sequence[Hashable]] = None, - version: Optional[int] = None, + byteorder: str | None = None, + time_stamp: datetime.datetime | None = None, + data_label: str | None = None, + variable_labels: dict[Hashable, str] | None = None, + convert_strl: Sequence[Hashable] | None = None, + version: int | None = None, compression: CompressionOptions = "infer", storage_options: StorageOptions = None, ): diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 597217ec67b0e..4b5acf0ef2df2 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -22,13 +22,13 @@ def hist_series( by=None, ax=None, grid: bool = True, - xlabelsize: Optional[int] = None, - xrot: Optional[float] = None, - ylabelsize: Optional[int] = None, - yrot: Optional[float] = None, - figsize: Optional[Tuple[int, int]] = None, - bins: Union[int, Sequence[int]] = 10, - backend: Optional[str] = None, + xlabelsize: int | None = None, + xrot: float | None = None, + ylabelsize: int | None = None, + yrot: float | None = None, + figsize: tuple[int, int] | None = None, + bins: int | Sequence[int] = 10, + backend: str | None = None, legend: bool = False, **kwargs, ): @@ -105,17 +105,17 @@ def hist_frame( column: IndexLabel = None, by=None, grid: bool = True, - xlabelsize: Optional[int] = None, - xrot: Optional[float] = None, - ylabelsize: Optional[int] = None, - yrot: Optional[float] = None, + xlabelsize: int | None = None, + xrot: float | None = None, + ylabelsize: int | None = None, + yrot: float | None = None, ax=None, sharex: bool = False, sharey: bool = False, - figsize: Optional[Tuple[int, int]] = None, - layout: Optional[Tuple[int, int]] = None, - bins: Union[int, Sequence[int]] = 10, - backend: Optional[str] = None, + figsize: tuple[int, int] | None = None, + layout: tuple[int, int] | None = None, + bins: int | Sequence[int] = 10, + backend: str | None = None, legend: bool = False, **kwargs, ): diff --git a/pandas/plotting/_matplotlib/__init__.py b/pandas/plotting/_matplotlib/__init__.py index e212127549355..fd74018dc1c41 100644 --- a/pandas/plotting/_matplotlib/__init__.py +++ b/pandas/plotting/_matplotlib/__init__.py @@ -33,7 +33,7 @@ if TYPE_CHECKING: from pandas.plotting._matplotlib.core import MPLPlot -PLOT_CLASSES: Dict[str, Type[MPLPlot]] = { +PLOT_CLASSES: dict[str, type[MPLPlot]] = { "line": LinePlot, "bar": BarPlot, "barh": BarhPlot, diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 7d743075674f1..5f5191fe4e65e 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -85,7 +85,7 @@ def _kind(self): _layout_type = "vertical" _default_rot = 0 - orientation: Optional[str] = None + orientation: str | None = None axes: np.ndarray # of Axes objects @@ -109,8 +109,8 @@ def __init__( ylim=None, xticks=None, yticks=None, - xlabel: Optional[Hashable] = None, - ylabel: Optional[Hashable] = None, + xlabel: Hashable | None = None, + ylabel: Hashable | None = None, sort_columns=False, fontsize=None, secondary_y=False, @@ -171,8 +171,8 @@ def __init__( self.grid = grid self.legend = legend - self.legend_handles: List[Artist] = [] - self.legend_labels: List[Hashable] = [] + self.legend_handles: list[Artist] = [] + self.legend_labels: list[Hashable] = [] self.logx = kwds.pop("logx", False) self.logy = kwds.pop("logy", False) @@ -560,7 +560,7 @@ def _apply_axis_properties(self, axis: Axis, rot=None, fontsize=None): label.set_fontsize(fontsize) @property - def legend_title(self) -> Optional[str]: + def legend_title(self) -> str | None: if not isinstance(self.data.columns, ABCMultiIndex): name = self.data.columns.name if name is not None: @@ -702,7 +702,7 @@ def _plot(cls, ax: Axes, x, y, style=None, is_errorbar: bool = False, **kwds): args = (x, y) # type: ignore[assignment] return ax.plot(*args, **kwds) - def _get_index_name(self) -> Optional[str]: + def _get_index_name(self) -> str | None: if isinstance(self.data.index, ABCMultiIndex): name = self.data.index.names if com.any_not_none(*name): @@ -909,7 +909,7 @@ def _get_subplots(self): ax for ax in self.axes[0].get_figure().get_axes() if isinstance(ax, Subplot) ] - def _get_axes_layout(self) -> Tuple[int, int]: + def _get_axes_layout(self) -> tuple[int, int]: axes = self._get_subplots() x_set = set() y_set = set() diff --git a/pandas/plotting/_matplotlib/misc.py b/pandas/plotting/_matplotlib/misc.py index 01ac3de4ff3bb..7954fb80d405e 100644 --- a/pandas/plotting/_matplotlib/misc.py +++ b/pandas/plotting/_matplotlib/misc.py @@ -131,7 +131,7 @@ def _get_marker_compat(marker): def radviz( frame: DataFrame, class_column, - ax: Optional[Axes] = None, + ax: Axes | None = None, color=None, colormap=None, **kwds, @@ -153,7 +153,7 @@ def normalize(series): ax.set_xlim(-1, 1) ax.set_ylim(-1, 1) - to_plot: Dict[Hashable, List[List]] = {} + to_plot: dict[Hashable, list[list]] = {} colors = get_standard_colors( num_colors=len(classes), colormap=colormap, color_type="random", color=color ) @@ -219,7 +219,7 @@ def normalize(series): def andrews_curves( frame: DataFrame, class_column, - ax: Optional[Axes] = None, + ax: Axes | None = None, samples: int = 200, color=None, colormap=None, @@ -257,7 +257,7 @@ def f(t): classes = frame[class_column].drop_duplicates() df = frame.drop(class_column, axis=1) t = np.linspace(-np.pi, np.pi, samples) - used_legends: Set[str] = set() + used_legends: set[str] = set() color_values = get_standard_colors( num_colors=len(classes), colormap=colormap, color_type="random", color=color @@ -285,7 +285,7 @@ def f(t): def bootstrap_plot( series: Series, - fig: Optional[Figure] = None, + fig: Figure | None = None, size: int = 50, samples: int = 500, **kwds, @@ -343,7 +343,7 @@ def parallel_coordinates( frame: DataFrame, class_column, cols=None, - ax: Optional[Axes] = None, + ax: Axes | None = None, color=None, use_columns=False, xticks=None, @@ -367,7 +367,7 @@ def parallel_coordinates( else: df = frame[cols] - used_legends: Set[str] = set() + used_legends: set[str] = set() ncols = len(df.columns) @@ -419,7 +419,7 @@ def parallel_coordinates( return ax -def lag_plot(series: Series, lag: int = 1, ax: Optional[Axes] = None, **kwds) -> Axes: +def lag_plot(series: Series, lag: int = 1, ax: Axes | None = None, **kwds) -> Axes: # workaround because `c='b'` is hardcoded in matplotlib's scatter method import matplotlib.pyplot as plt @@ -436,7 +436,7 @@ def lag_plot(series: Series, lag: int = 1, ax: Optional[Axes] = None, **kwds) -> return ax -def autocorrelation_plot(series: Series, ax: Optional[Axes] = None, **kwds) -> Axes: +def autocorrelation_plot(series: Series, ax: Axes | None = None, **kwds) -> Axes: import matplotlib.pyplot as plt n = len(series) diff --git a/pandas/plotting/_matplotlib/style.py b/pandas/plotting/_matplotlib/style.py index 2c9aadd9573cf..f3695a1ca8e02 100644 --- a/pandas/plotting/_matplotlib/style.py +++ b/pandas/plotting/_matplotlib/style.py @@ -31,9 +31,9 @@ def get_standard_colors( num_colors: int, - colormap: Optional[Colormap] = None, + colormap: Colormap | None = None, color_type: str = "default", - color: Optional[Union[Dict[str, Color], Color, Collection[Color]]] = None, + color: dict[str, Color] | Color | Collection[Color] | None = None, ): """ Get standard colors based on `colormap`, `color_type` or `color` inputs. @@ -82,11 +82,11 @@ def get_standard_colors( def _derive_colors( *, - color: Optional[Union[Color, Collection[Color]]], - colormap: Optional[Union[str, Colormap]], + color: Color | Collection[Color] | None, + colormap: str | Colormap | None, color_type: str, num_colors: int, -) -> List[Color]: +) -> list[Color]: """ Derive colors from either `colormap`, `color_type` or `color` inputs. @@ -131,7 +131,7 @@ def _derive_colors( return _get_colors_from_color_type(color_type, num_colors=num_colors) -def _cycle_colors(colors: List[Color], num_colors: int) -> Iterator[Color]: +def _cycle_colors(colors: list[Color], num_colors: int) -> Iterator[Color]: """Cycle colors until achieving max of `num_colors` or length of `colors`. Extra colors will be ignored by matplotlib if there are more colors @@ -142,15 +142,15 @@ def _cycle_colors(colors: List[Color], num_colors: int) -> Iterator[Color]: def _get_colors_from_colormap( - colormap: Union[str, Colormap], + colormap: str | Colormap, num_colors: int, -) -> List[Color]: +) -> list[Color]: """Get colors from colormap.""" colormap = _get_cmap_instance(colormap) return [colormap(num) for num in np.linspace(0, 1, num=num_colors)] -def _get_cmap_instance(colormap: Union[str, Colormap]) -> Colormap: +def _get_cmap_instance(colormap: str | Colormap) -> Colormap: """Get instance of matplotlib colormap.""" if isinstance(colormap, str): cmap = colormap @@ -161,8 +161,8 @@ def _get_cmap_instance(colormap: Union[str, Colormap]) -> Colormap: def _get_colors_from_color( - color: Union[Color, Collection[Color]], -) -> List[Color]: + color: Color | Collection[Color], +) -> list[Color]: """Get colors from user input color.""" if len(color) == 0: raise ValueError(f"Invalid color argument: {color}") @@ -175,7 +175,7 @@ def _get_colors_from_color( return list(_gen_list_of_colors_from_iterable(color)) -def _is_single_color(color: Union[Color, Collection[Color]]) -> bool: +def _is_single_color(color: Color | Collection[Color]) -> bool: """Check if `color` is a single color, not a sequence of colors. Single color is of these kinds: @@ -208,7 +208,7 @@ def _gen_list_of_colors_from_iterable(color: Collection[Color]) -> Iterator[Colo raise ValueError(f"Invalid color {x}") -def _is_floats_color(color: Union[Color, Collection[Color]]) -> bool: +def _is_floats_color(color: Color | Collection[Color]) -> bool: """Check if color comprises a sequence of floats representing color.""" return bool( is_list_like(color) @@ -217,7 +217,7 @@ def _is_floats_color(color: Union[Color, Collection[Color]]) -> bool: ) -def _get_colors_from_color_type(color_type: str, num_colors: int) -> List[Color]: +def _get_colors_from_color_type(color_type: str, num_colors: int) -> list[Color]: """Get colors from user input color type.""" if color_type == "default": return _get_default_colors(num_colors) @@ -227,7 +227,7 @@ def _get_colors_from_color_type(color_type: str, num_colors: int) -> List[Color] raise ValueError("color_type must be either 'default' or 'random'") -def _get_default_colors(num_colors: int) -> List[Color]: +def _get_default_colors(num_colors: int) -> list[Color]: """Get `num_colors` of default colors from matplotlib rc params.""" import matplotlib.pyplot as plt @@ -235,12 +235,12 @@ def _get_default_colors(num_colors: int) -> List[Color]: return colors[0:num_colors] -def _get_random_colors(num_colors: int) -> List[Color]: +def _get_random_colors(num_colors: int) -> list[Color]: """Get `num_colors` of random colors.""" return [_random_color(num) for num in range(num_colors)] -def _random_color(column: int) -> List[float]: +def _random_color(column: int) -> list[float]: """Get a random color represented as a list of length 3""" # GH17525 use common._random_state to avoid resetting the seed rs = com.random_state(column) diff --git a/pandas/plotting/_matplotlib/timeseries.py b/pandas/plotting/_matplotlib/timeseries.py index 51916075018a3..3166b9abef818 100644 --- a/pandas/plotting/_matplotlib/timeseries.py +++ b/pandas/plotting/_matplotlib/timeseries.py @@ -170,7 +170,7 @@ def _get_ax_freq(ax: Axes): return ax_freq -def _get_period_alias(freq) -> Optional[str]: +def _get_period_alias(freq) -> str | None: freqstr = to_offset(freq).rule_code freq = get_period_alias(freqstr) @@ -223,7 +223,7 @@ def use_dynamic_x(ax: Axes, data: FrameOrSeriesUnion) -> bool: return True -def _get_index_freq(index: Index) -> Optional[BaseOffset]: +def _get_index_freq(index: Index) -> BaseOffset | None: freq = getattr(index, "freq", None) if freq is None: freq = getattr(index, "inferred_freq", None) diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py index df94b71f5e7a9..92c162fd01855 100644 --- a/pandas/plotting/_matplotlib/tools.py +++ b/pandas/plotting/_matplotlib/tools.py @@ -70,7 +70,7 @@ def table( return table -def _get_layout(nplots: int, layout=None, layout_type: str = "box") -> Tuple[int, int]: +def _get_layout(nplots: int, layout=None, layout_type: str = "box") -> tuple[int, int]: if layout is not None: if not isinstance(layout, (tuple, list)) or len(layout) != 2: raise ValueError("Layout must be a tuple of (rows, columns)") @@ -418,7 +418,7 @@ def handle_shared_axes( _remove_labels_from_axis(ax.yaxis) -def flatten_axes(axes: Union[Axes, Sequence[Axes]]) -> np.ndarray: +def flatten_axes(axes: Axes | Sequence[Axes]) -> np.ndarray: if not is_list_like(axes): return np.array([axes]) elif isinstance(axes, (np.ndarray, ABCIndex)): @@ -427,7 +427,7 @@ def flatten_axes(axes: Union[Axes, Sequence[Axes]]) -> np.ndarray: def set_ticks_props( - axes: Union[Axes, Sequence[Axes]], + axes: Axes | Sequence[Axes], xlabelsize=None, xrot=None, ylabelsize=None, @@ -447,7 +447,7 @@ def set_ticks_props( return axes -def get_all_lines(ax: Axes) -> List[Line2D]: +def get_all_lines(ax: Axes) -> list[Line2D]: lines = ax.get_lines() if hasattr(ax, "right_ax"): @@ -459,7 +459,7 @@ def get_all_lines(ax: Axes) -> List[Line2D]: return lines -def get_xlim(lines: Iterable[Line2D]) -> Tuple[float, float]: +def get_xlim(lines: Iterable[Line2D]) -> tuple[float, float]: left, right = np.inf, -np.inf for line in lines: x = line.get_xdata(orig=False) diff --git a/pandas/tests/extension/arrow/arrays.py b/pandas/tests/extension/arrow/arrays.py index 27c8c8170e11b..dada27dc36718 100644 --- a/pandas/tests/extension/arrow/arrays.py +++ b/pandas/tests/extension/arrow/arrays.py @@ -36,7 +36,7 @@ class ArrowBoolDtype(ExtensionDtype): na_value = pa.NULL @classmethod - def construct_array_type(cls) -> Type[ArrowBoolArray]: + def construct_array_type(cls) -> type[ArrowBoolArray]: """ Return the array type associated with this dtype. @@ -60,7 +60,7 @@ class ArrowStringDtype(ExtensionDtype): na_value = pa.NULL @classmethod - def construct_array_type(cls) -> Type[ArrowStringArray]: + def construct_array_type(cls) -> type[ArrowStringArray]: """ Return the array type associated with this dtype. diff --git a/pandas/tests/extension/arrow/test_timestamp.py b/pandas/tests/extension/arrow/test_timestamp.py index bd661ad20bb02..e5e3de6de311f 100644 --- a/pandas/tests/extension/arrow/test_timestamp.py +++ b/pandas/tests/extension/arrow/test_timestamp.py @@ -24,7 +24,7 @@ class ArrowTimestampUSDtype(ExtensionDtype): na_value = pa.NULL @classmethod - def construct_array_type(cls) -> Type[ArrowTimestampUSArray]: + def construct_array_type(cls) -> type[ArrowTimestampUSArray]: """ Return the array type associated with this dtype. diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 4122fcaae496b..cf61f5d933324 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -33,7 +33,7 @@ def __repr__(self) -> str: return f"DecimalDtype(context={self.context})" @classmethod - def construct_array_type(cls) -> Type[DecimalArray]: + def construct_array_type(cls) -> type[DecimalArray]: """ Return the array type associated with this dtype. diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 5fcfe4faac55a..aeba7ed477308 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -36,7 +36,7 @@ class JSONDtype(ExtensionDtype): na_value: Mapping[str, Any] = UserDict() @classmethod - def construct_array_type(cls) -> Type[JSONArray]: + def construct_array_type(cls) -> type[JSONArray]: """ Return the array type associated with this dtype. diff --git a/pandas/tests/extension/list/array.py b/pandas/tests/extension/list/array.py index 998dad208033e..7562f750bf9d6 100644 --- a/pandas/tests/extension/list/array.py +++ b/pandas/tests/extension/list/array.py @@ -25,7 +25,7 @@ class ListDtype(ExtensionDtype): na_value = np.nan @classmethod - def construct_array_type(cls) -> Type[ListArray]: + def construct_array_type(cls) -> type[ListArray]: """ Return the array type associated with this dtype. diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 29c02916ec6e9..e66cba3936803 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -186,7 +186,7 @@ def _check_visible(self, collections, visible=True): assert patch.get_visible() == visible def _check_patches_all_filled( - self, axes: Union[Axes, Sequence[Axes]], filled: bool = True + self, axes: Axes | Sequence[Axes], filled: bool = True ) -> None: """ Check for each artist whether it is filled or not