Skip to content

Commit 548c198

Browse files
committed
make extension arrays work with new typing, fixing astype and to_numpy
1 parent 3c2c78b commit 548c198

File tree

27 files changed

+197
-173
lines changed

27 files changed

+197
-173
lines changed

pandas/_testing/asserters.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -382,9 +382,13 @@ def _get_ilevel_values(index, level):
382382
# skip exact index checking when `check_categorical` is False
383383
if check_exact and check_categorical:
384384
if not left.equals(right):
385-
diff = (
386-
np.sum((left._values != right._values).astype(int)) * 100.0 / len(left)
387-
)
385+
# error: Value of type variable "_Number" of "sum" cannot be
386+
# "Union[ExtensionArray, ndarray, Any]"
387+
thesum = np.sum(
388+
(left._values != right._values).astype(int)
389+
) # type: ignore[type-var]
390+
# error: Unsupported operand types for * ("ExtensionArray" and "float")
391+
diff = thesum * 100.0 / len(left) # type: ignore[operator]
388392
msg = f"{obj} values are different ({np.round(diff, 5)} %)"
389393
raise_assert_detail(obj, msg, left, right)
390394
else:

pandas/_typing.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,17 @@
7676

7777
ArrayLike = Union["ExtensionArray", np.ndarray]
7878
AnyArrayLike = Union[ArrayLike, "Index", "Series"]
79-
79+
AnySequenceLike = Union[
80+
"ExtensionArray",
81+
"Index",
82+
"Series",
83+
Sequence[Any],
84+
np.ndarray,
85+
]
8086
# scalars
8187

8288
PythonScalar = Union[str, int, float, bool]
83-
DatetimeLikeScalar = TypeVar("DatetimeLikeScalar", "Period", "Timestamp", "Timedelta")
89+
DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"]
8490
PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"]
8591
Scalar = Union[PythonScalar, PandasScalar]
8692

pandas/core/algorithms.py

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@
3030
)
3131
from pandas._typing import (
3232
AnyArrayLike,
33+
AnySequenceLike,
3334
ArrayLike,
35+
Dtype,
3436
DtypeObj,
3537
FrameOrSeriesUnion,
3638
)
@@ -216,7 +218,7 @@ def _ensure_data(values: ArrayLike) -> Tuple[np.ndarray, DtypeObj]:
216218

217219

218220
def _reconstruct_data(
219-
values: ArrayLike, dtype: DtypeObj, original: AnyArrayLike
221+
values: ArrayLike, dtype: Dtype, original: AnyArrayLike
220222
) -> ArrayLike:
221223
"""
222224
reverse of _ensure_data
@@ -244,31 +246,28 @@ def _reconstruct_data(
244246

245247
values = cls._from_sequence(values)
246248
elif is_bool_dtype(dtype):
247-
# error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has
248-
# incompatible type "Union[dtype, ExtensionDtype]"; expected
249-
# "Union[dtype, None, type, _SupportsDtype, str, Tuple[Any, int],
250-
# Tuple[Any, Union[int, Sequence[int]]], List[Any], _DtypeDict,
251-
# Tuple[Any, Any]]"
249+
# error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible
250+
# type "Union[ExtensionDtype, Union[str, dtype[Any]], Type[str], Type[float],
251+
# Type[int], Type[complex], Type[bool], Type[object]]"; expected
252+
# "Union[dtype[Any], None, type, _SupportsDType, str, Union[Tuple[Any, int],
253+
# Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict,
254+
# Tuple[Any, Any]]]"
252255
values = values.astype(dtype, copy=False) # type: ignore[arg-type]
253256

254257
# we only support object dtypes bool Index
255258
if isinstance(original, ABCIndex):
256259
values = values.astype(object, copy=False)
257260
elif dtype is not None:
258261
if is_datetime64_dtype(dtype):
259-
# error: Incompatible types in assignment (expression has type
260-
# "str", variable has type "Union[dtype, ExtensionDtype]")
261-
dtype = "datetime64[ns]" # type: ignore[assignment]
262+
dtype = "datetime64[ns]"
262263
elif is_timedelta64_dtype(dtype):
263-
# error: Incompatible types in assignment (expression has type
264-
# "str", variable has type "Union[dtype, ExtensionDtype]")
265-
dtype = "timedelta64[ns]" # type: ignore[assignment]
266-
267-
# error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has
268-
# incompatible type "Union[dtype, ExtensionDtype]"; expected
269-
# "Union[dtype, None, type, _SupportsDtype, str, Tuple[Any, int],
270-
# Tuple[Any, Union[int, Sequence[int]]], List[Any], _DtypeDict,
271-
# Tuple[Any, Any]]"
264+
dtype = "timedelta64[ns]"
265+
# error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible
266+
# type "Union[ExtensionDtype, Union[str, dtype[Any]], Type[str], Type[float],
267+
# Type[int], Type[complex], Type[bool], Type[object]]"; expected
268+
# "Union[dtype[Any], None, type, _SupportsDType, str, Union[Tuple[Any, int],
269+
# Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict,
270+
# Tuple[Any, Any]]]"
272271
values = values.astype(dtype, copy=False) # type: ignore[arg-type]
273272

274273
return values
@@ -461,7 +460,7 @@ def unique(values):
461460
unique1d = unique
462461

463462

464-
def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
463+
def isin(comps: AnySequenceLike, values: AnySequenceLike) -> np.ndarray:
465464
"""
466465
Compute the isin boolean array.
467466
@@ -497,9 +496,11 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
497496
comps = _ensure_arraylike(comps)
498497
comps = extract_array(comps, extract_numpy=True)
499498
if is_extension_array_dtype(comps.dtype):
500-
# error: Incompatible return value type (got "Series", expected "ndarray")
501-
# error: Item "ndarray" of "Union[Any, ndarray]" has no attribute "isin"
502-
return comps.isin(values) # type: ignore[return-value,union-attr]
499+
# error: Argument 1 to "isin" of "ExtensionArray" has incompatible type
500+
# "Union[Any, ExtensionArray, ndarray]"; expected "Sequence[Any]"
501+
# error: Item "ndarray" of "Union[Any, ExtensionArray, ndarray]" has no
502+
# attribute "isin"
503+
return comps.isin(values) # type: ignore[arg-type, union-attr]
503504

504505
elif needs_i8_conversion(comps.dtype):
505506
# Dispatch to DatetimeLikeArrayMixin.isin

pandas/core/array_algos/putmask.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ def extract_bool_array(mask: ArrayLike) -> np.ndarray:
191191
# We could have BooleanArray, Sparse[bool], ...
192192
# Except for BooleanArray, this is equivalent to just
193193
# np.asarray(mask, dtype=bool)
194-
mask = mask.to_numpy(dtype=bool, na_value=False)
194+
mask = mask.to_numpy(dtype=np.dtype(bool), na_value=False)
195195

196196
mask = np.asarray(mask, dtype=bool)
197197
return mask

pandas/core/arrays/_mixins.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from functools import wraps
44
from typing import (
55
Any,
6+
List,
67
Optional,
78
Sequence,
89
Type,
@@ -260,12 +261,13 @@ def __getitem__(self: NDArrayBackedExtensionArrayT, key: int) -> Any:
260261

261262
@overload
262263
def __getitem__(
263-
self: NDArrayBackedExtensionArrayT, key: Union[slice, np.ndarray]
264+
self: NDArrayBackedExtensionArrayT, key: Union[slice, np.ndarray, List[Any]]
264265
) -> NDArrayBackedExtensionArrayT:
265266
...
266267

267268
def __getitem__(
268-
self: NDArrayBackedExtensionArrayT, key: Union[int, slice, np.ndarray]
269+
self: NDArrayBackedExtensionArrayT,
270+
key: Union[int, slice, np.ndarray, List[Any]],
269271
) -> Union[NDArrayBackedExtensionArrayT, Any]:
270272
if lib.is_integer(key):
271273
# fast-path

pandas/core/arrays/base.py

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
Callable,
1616
Dict,
1717
Iterator,
18+
List,
1819
Literal,
1920
Optional,
2021
Sequence,
@@ -32,6 +33,7 @@
3233
from pandas._typing import (
3334
ArrayLike,
3435
Dtype,
36+
NpDtype,
3537
Shape,
3638
)
3739
from pandas.compat import set_function_name
@@ -301,11 +303,11 @@ def __getitem__(self, item: int) -> Any:
301303
...
302304

303305
@overload
304-
def __getitem__(self, item: Union[slice, np.ndarray]) -> ExtensionArray:
306+
def __getitem__(self, item: Union[slice, np.ndarray, List[Any]]) -> ExtensionArray:
305307
...
306308

307309
def __getitem__(
308-
self, item: Union[int, slice, np.ndarray]
310+
self, item: Union[int, slice, np.ndarray, List[Any]]
309311
) -> Union[ExtensionArray, Any]:
310312
"""
311313
Select a subset of self.
@@ -441,9 +443,10 @@ def __ne__(self, other: Any) -> ArrayLike: # type: ignore[override]
441443

442444
def to_numpy(
443445
self,
444-
dtype: Optional[Dtype] = None,
446+
dtype: Optional[NpDtype] = None,
445447
copy: bool = False,
446448
na_value: Optional[Any] = lib.no_default,
449+
**kwargs: Any,
447450
) -> np.ndarray:
448451
"""
449452
Convert to a NumPy ndarray.
@@ -470,12 +473,7 @@ def to_numpy(
470473
-------
471474
numpy.ndarray
472475
"""
473-
# error: Argument "dtype" to "asarray" has incompatible type
474-
# "Union[ExtensionDtype, str, dtype[Any], Type[str], Type[float], Type[int],
475-
# Type[complex], Type[bool], Type[object], None]"; expected "Union[dtype[Any],
476-
# None, type, _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, Union[int,
477-
# Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]"
478-
result = np.asarray(self, dtype=dtype) # type: ignore[arg-type]
476+
result = np.asarray(self, dtype=dtype)
479477
if copy or na_value is not lib.no_default:
480478
result = result.copy()
481479
if na_value is not lib.no_default:
@@ -527,8 +525,15 @@ def nbytes(self) -> int:
527525
# ------------------------------------------------------------------------
528526
# Additional Methods
529527
# ------------------------------------------------------------------------
528+
@overload
529+
def astype(self, dtype: Type[str], copy: bool = True) -> np.ndarray:
530+
...
531+
532+
@overload
533+
def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
534+
...
530535

531-
def astype(self, dtype: Dtype, copy: bool = True) -> np.ndarray:
536+
def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
532537
"""
533538
Cast to a NumPy array with 'dtype'.
534539
@@ -562,7 +567,7 @@ def astype(self, dtype: Dtype, copy: bool = True) -> np.ndarray:
562567
): # allow conversion to StringArrays
563568
return dtype.construct_array_type()._from_sequence(self, copy=False)
564569

565-
return np.array(self, dtype=dtype, copy=copy)
570+
return np.array(self, dtype=cast(NpDtype, dtype), copy=copy)
566571

567572
def isna(self) -> Union[np.ndarray, ExtensionArraySupportsAnyAll]:
568573
"""
@@ -829,9 +834,9 @@ def unique(self) -> ExtensionArray:
829834

830835
def searchsorted(
831836
self,
832-
value: ArrayLike,
837+
value: Sequence[Any],
833838
side: Literal["left", "right"] = "left",
834-
sorter: Optional[ArrayLike] = None,
839+
sorter: Optional[Sequence[Any]] = None,
835840
) -> np.ndarray:
836841
"""
837842
Find indices where elements should be inserted to maintain order.
@@ -877,7 +882,7 @@ def searchsorted(
877882
# 1. Values outside the range of the `data_for_sorting` fixture
878883
# 2. Values between the values in the `data_for_sorting` fixture
879884
# 3. Missing values.
880-
arr = self.astype(object)
885+
arr = cast(np.ndarray, self.astype(object))
881886
return arr.searchsorted(value, side=side, sorter=sorter)
882887

883888
def equals(self, other: object) -> bool:
@@ -914,7 +919,7 @@ def equals(self, other: object) -> bool:
914919
equal_na = self.isna() & other.isna() # type: ignore[operator]
915920
return bool((equal_values | equal_na).all())
916921

917-
def isin(self, values: Union[ExtensionArray, Sequence[Any]]) -> np.ndarray:
922+
def isin(self, values: Sequence[Any]) -> np.ndarray:
918923
"""
919924
Pointwise comparison for set containment in the given values.
920925
@@ -928,7 +933,7 @@ def isin(self, values: Union[ExtensionArray, Sequence[Any]]) -> np.ndarray:
928933
-------
929934
np.ndarray[bool]
930935
"""
931-
return isin(np.asarray(self), values)
936+
return isin(self.astype(object), values)
932937

933938
def _values_for_factorize(self) -> Tuple[np.ndarray, Any]:
934939
"""
@@ -952,7 +957,7 @@ def _values_for_factorize(self) -> Tuple[np.ndarray, Any]:
952957
The values returned by this method are also used in
953958
:func:`pandas.util.hash_pandas_object`.
954959
"""
955-
return self.astype(object), np.nan
960+
return cast(np.ndarray, self.astype(object)), np.nan
956961

957962
def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, ExtensionArray]:
958963
"""

pandas/core/arrays/boolean.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
Tuple,
99
Type,
1010
Union,
11+
overload,
1112
)
1213
import warnings
1314

@@ -20,6 +21,7 @@
2021
from pandas._typing import (
2122
ArrayLike,
2223
Dtype,
24+
DtypeArg,
2325
)
2426
from pandas.compat.numpy import function as nv
2527

@@ -296,7 +298,7 @@ def dtype(self) -> BooleanDtype:
296298

297299
@classmethod
298300
def _from_sequence(
299-
cls, scalars, *, dtype: Optional[Dtype] = None, copy: bool = False
301+
cls, scalars, *, dtype: Optional[DtypeArg] = None, copy: bool = False
300302
) -> BooleanArray:
301303
if dtype:
302304
assert dtype == "boolean"
@@ -379,7 +381,15 @@ def reconstruct(x):
379381
def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]:
380382
return coerce_to_array(value)
381383

382-
def astype(self, dtype, copy: bool = True) -> ArrayLike:
384+
@overload
385+
def astype(self, dtype: Type[str], copy: bool = True) -> np.ndarray:
386+
...
387+
388+
@overload
389+
def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
390+
...
391+
392+
def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
383393
"""
384394
Cast to a NumPy array or ExtensionArray with 'dtype'.
385395

pandas/core/arrays/categorical.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
TypeVar,
1616
Union,
1717
cast,
18+
overload,
1819
)
1920
from warnings import warn
2021

@@ -479,6 +480,14 @@ def _constructor(self) -> Type[Categorical]:
479480
def _from_sequence(cls, scalars, *, dtype: Optional[Dtype] = None, copy=False):
480481
return Categorical(scalars, dtype=dtype, copy=copy)
481482

483+
@overload
484+
def astype(self, dtype: Type[str], copy: bool = True) -> np.ndarray:
485+
...
486+
487+
@overload
488+
def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
489+
...
490+
482491
def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
483492
"""
484493
Coerce this type to another dtype
@@ -2454,11 +2463,7 @@ def _str_get_dummies(self, sep="|"):
24542463
# sep may not be in categories. Just bail on this.
24552464
from pandas.core.arrays import PandasArray
24562465

2457-
# error: Argument 1 to "PandasArray" has incompatible type
2458-
# "ExtensionArray"; expected "Union[ndarray, PandasArray]"
2459-
return PandasArray(self.astype(str))._str_get_dummies( # type: ignore[arg-type]
2460-
sep
2461-
)
2466+
return PandasArray(self.astype(str))._str_get_dummies(sep)
24622467

24632468

24642469
# The Series.cat accessor

0 commit comments

Comments
 (0)