Skip to content

Commit 65b7536

Browse files
authored
Merge pull request #141 from pandas-dev/master
Sync Fork from Upstream Repo
2 parents df14a3e + 4007513 commit 65b7536

File tree

10 files changed

+75
-96
lines changed

10 files changed

+75
-96
lines changed

pandas/core/algorithms.py

Lines changed: 10 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1634,10 +1634,10 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3):
16341634
16351635
Parameters
16361636
----------
1637-
arr : ndarray
1637+
arr : ndarray or ExtensionArray
16381638
n : int
16391639
number of periods
1640-
axis : int
1640+
axis : {0, 1}
16411641
axis to shift on
16421642
stacklevel : int
16431643
The stacklevel for the lost dtype warning.
@@ -1651,7 +1651,8 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3):
16511651
na = np.nan
16521652
dtype = arr.dtype
16531653

1654-
if dtype.kind == "b":
1654+
is_bool = is_bool_dtype(dtype)
1655+
if is_bool:
16551656
op = operator.xor
16561657
else:
16571658
op = operator.sub
@@ -1677,17 +1678,15 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3):
16771678
dtype = arr.dtype
16781679

16791680
is_timedelta = False
1680-
is_bool = False
16811681
if needs_i8_conversion(arr.dtype):
16821682
dtype = np.int64
16831683
arr = arr.view("i8")
16841684
na = iNaT
16851685
is_timedelta = True
16861686

1687-
elif is_bool_dtype(dtype):
1687+
elif is_bool:
16881688
# We have to cast in order to be able to hold np.nan
16891689
dtype = np.object_
1690-
is_bool = True
16911690

16921691
elif is_integer_dtype(dtype):
16931692
# We have to cast in order to be able to hold np.nan
@@ -1708,45 +1707,26 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3):
17081707
dtype = np.dtype(dtype)
17091708
out_arr = np.empty(arr.shape, dtype=dtype)
17101709

1711-
na_indexer = [slice(None)] * arr.ndim
1710+
na_indexer = [slice(None)] * 2
17121711
na_indexer[axis] = slice(None, n) if n >= 0 else slice(n, None)
17131712
out_arr[tuple(na_indexer)] = na
17141713

1715-
if arr.ndim == 2 and arr.dtype.name in _diff_special:
1714+
if arr.dtype.name in _diff_special:
17161715
# TODO: can diff_2d dtype specialization troubles be fixed by defining
17171716
# out_arr inside diff_2d?
17181717
algos.diff_2d(arr, out_arr, n, axis, datetimelike=is_timedelta)
17191718
else:
17201719
# To keep mypy happy, _res_indexer is a list while res_indexer is
17211720
# a tuple, ditto for lag_indexer.
1722-
_res_indexer = [slice(None)] * arr.ndim
1721+
_res_indexer = [slice(None)] * 2
17231722
_res_indexer[axis] = slice(n, None) if n >= 0 else slice(None, n)
17241723
res_indexer = tuple(_res_indexer)
17251724

1726-
_lag_indexer = [slice(None)] * arr.ndim
1725+
_lag_indexer = [slice(None)] * 2
17271726
_lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None)
17281727
lag_indexer = tuple(_lag_indexer)
17291728

1730-
# need to make sure that we account for na for datelike/timedelta
1731-
# we don't actually want to subtract these i8 numbers
1732-
if is_timedelta:
1733-
res = arr[res_indexer]
1734-
lag = arr[lag_indexer]
1735-
1736-
mask = (arr[res_indexer] == na) | (arr[lag_indexer] == na)
1737-
if mask.any():
1738-
res = res.copy()
1739-
res[mask] = 0
1740-
lag = lag.copy()
1741-
lag[mask] = 0
1742-
1743-
result = res - lag
1744-
result[mask] = na
1745-
out_arr[res_indexer] = result
1746-
elif is_bool:
1747-
out_arr[res_indexer] = arr[res_indexer] ^ arr[lag_indexer]
1748-
else:
1749-
out_arr[res_indexer] = arr[res_indexer] - arr[lag_indexer]
1729+
out_arr[res_indexer] = op(arr[res_indexer], arr[lag_indexer])
17501730

17511731
if is_timedelta:
17521732
out_arr = out_arr.view("timedelta64[ns]")

pandas/core/groupby/groupby.py

Lines changed: 18 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ class providing the base-class of operations.
4646
)
4747
import pandas._libs.groupby as libgroupby
4848
from pandas._typing import (
49+
ArrayLike,
4950
F,
5051
FrameOrSeries,
5152
FrameOrSeriesUnion,
@@ -68,7 +69,6 @@ class providing the base-class of operations.
6869
ensure_float,
6970
is_bool_dtype,
7071
is_datetime64_dtype,
71-
is_extension_array_dtype,
7272
is_integer_dtype,
7373
is_numeric_dtype,
7474
is_object_dtype,
@@ -85,6 +85,7 @@ class providing the base-class of operations.
8585
from pandas.core.arrays import (
8686
Categorical,
8787
DatetimeArray,
88+
ExtensionArray,
8889
)
8990
from pandas.core.base import (
9091
DataError,
@@ -2265,37 +2266,31 @@ def quantile(self, q=0.5, interpolation: str = "linear"):
22652266
"""
22662267
from pandas import concat
22672268

2268-
def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]:
2269+
def pre_processor(vals: ArrayLike) -> Tuple[np.ndarray, Optional[np.dtype]]:
22692270
if is_object_dtype(vals):
22702271
raise TypeError(
22712272
"'quantile' cannot be performed against 'object' dtypes!"
22722273
)
22732274

2274-
inference = None
2275+
inference: Optional[np.dtype] = None
22752276
if is_integer_dtype(vals.dtype):
2276-
if is_extension_array_dtype(vals.dtype):
2277-
# error: "ndarray" has no attribute "to_numpy"
2278-
vals = vals.to_numpy( # type: ignore[attr-defined]
2279-
dtype=float, na_value=np.nan
2280-
)
2281-
inference = np.int64
2282-
elif is_bool_dtype(vals.dtype) and is_extension_array_dtype(vals.dtype):
2283-
# error: "ndarray" has no attribute "to_numpy"
2284-
vals = vals.to_numpy( # type: ignore[attr-defined]
2285-
dtype=float, na_value=np.nan
2286-
)
2277+
if isinstance(vals, ExtensionArray):
2278+
out = vals.to_numpy(dtype=float, na_value=np.nan)
2279+
else:
2280+
out = vals
2281+
inference = np.dtype(np.int64)
2282+
elif is_bool_dtype(vals.dtype) and isinstance(vals, ExtensionArray):
2283+
out = vals.to_numpy(dtype=float, na_value=np.nan)
22872284
elif is_datetime64_dtype(vals.dtype):
2288-
# error: Incompatible types in assignment (expression has type
2289-
# "str", variable has type "Optional[Type[int64]]")
2290-
inference = "datetime64[ns]" # type: ignore[assignment]
2291-
vals = np.asarray(vals).astype(float)
2285+
inference = np.dtype("datetime64[ns]")
2286+
out = np.asarray(vals).astype(float)
22922287
elif is_timedelta64_dtype(vals.dtype):
2293-
# error: Incompatible types in assignment (expression has type "str",
2294-
# variable has type "Optional[Type[signedinteger[Any]]]")
2295-
inference = "timedelta64[ns]" # type: ignore[assignment]
2296-
vals = np.asarray(vals).astype(float)
2288+
inference = np.dtype("timedelta64[ns]")
2289+
out = np.asarray(vals).astype(float)
2290+
else:
2291+
out = np.asarray(vals)
22972292

2298-
return vals, inference
2293+
return out, inference
22992294

23002295
def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray:
23012296
if inference:

pandas/core/groupby/ops.py

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
is_timedelta64_dtype,
6666
needs_i8_conversion,
6767
)
68+
from pandas.core.dtypes.dtypes import ExtensionDtype
6869
from pandas.core.dtypes.generic import ABCCategoricalIndex
6970
from pandas.core.dtypes.missing import (
7071
isna,
@@ -522,7 +523,7 @@ def _disallow_invalid_ops(self, values: ArrayLike, how: str):
522523
@final
523524
def _ea_wrap_cython_operation(
524525
self, kind: str, values, how: str, axis: int, min_count: int = -1, **kwargs
525-
) -> Tuple[np.ndarray, Optional[List[str]]]:
526+
) -> np.ndarray:
526527
"""
527528
If we have an ExtensionArray, unwrap, call _cython_operation, and
528529
re-wrap if appropriate.
@@ -539,10 +540,7 @@ def _ea_wrap_cython_operation(
539540
)
540541
if how in ["rank"]:
541542
# preserve float64 dtype
542-
543-
# error: Incompatible return value type (got "ndarray", expected
544-
# "Tuple[ndarray, Optional[List[str]]]")
545-
return res_values # type: ignore[return-value]
543+
return res_values
546544

547545
res_values = res_values.astype("i8", copy=False)
548546
result = type(orig_values)(res_values, dtype=orig_values.dtype)
@@ -555,14 +553,11 @@ def _ea_wrap_cython_operation(
555553
kind, values, how, axis, min_count, **kwargs
556554
)
557555
dtype = maybe_cast_result_dtype(orig_values.dtype, how)
558-
if is_extension_array_dtype(dtype):
559-
# error: Item "dtype[Any]" of "Union[dtype[Any], ExtensionDtype]" has no
560-
# attribute "construct_array_type"
561-
cls = dtype.construct_array_type() # type: ignore[union-attr]
556+
if isinstance(dtype, ExtensionDtype):
557+
cls = dtype.construct_array_type()
562558
return cls._from_sequence(res_values, dtype=dtype)
563-
# error: Incompatible return value type (got "ndarray", expected
564-
# "Tuple[ndarray, Optional[List[str]]]")
565-
return res_values # type: ignore[return-value]
559+
560+
return res_values
566561

567562
elif is_float_dtype(values.dtype):
568563
# FloatingArray
@@ -599,9 +594,7 @@ def _cython_operation(
599594
self._disallow_invalid_ops(values, how)
600595

601596
if is_extension_array_dtype(values.dtype):
602-
# error: Incompatible return value type (got "Tuple[ndarray,
603-
# Optional[List[str]]]", expected "ndarray")
604-
return self._ea_wrap_cython_operation( # type: ignore[return-value]
597+
return self._ea_wrap_cython_operation(
605598
kind, values, how, axis, min_count, **kwargs
606599
)
607600

pandas/core/indexes/base.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3876,7 +3876,14 @@ def _reindex_non_unique(self, target):
38763876
# --------------------------------------------------------------------
38773877
# Join Methods
38783878

3879-
def join(self, other, how="left", level=None, return_indexers=False, sort=False):
3879+
def join(
3880+
self,
3881+
other,
3882+
how: str_t = "left",
3883+
level=None,
3884+
return_indexers: bool = False,
3885+
sort: bool = False,
3886+
):
38803887
"""
38813888
Compute join_index and indexers to conform data
38823889
structures to the new index.

pandas/core/indexes/datetimelike.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -827,7 +827,12 @@ def _union(self, other, sort):
827827
_join_precedence = 10
828828

829829
def join(
830-
self, other, how: str = "left", level=None, return_indexers=False, sort=False
830+
self,
831+
other,
832+
how: str = "left",
833+
level=None,
834+
return_indexers: bool = False,
835+
sort: bool = False,
831836
):
832837
"""
833838
See Index.join

pandas/core/reshape/reshape.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22

33
import itertools
44
from typing import (
5+
TYPE_CHECKING,
56
List,
67
Optional,
78
Union,
9+
cast,
810
)
911

1012
import numpy as np
@@ -44,6 +46,9 @@
4446
get_group_index_sorter,
4547
)
4648

49+
if TYPE_CHECKING:
50+
from pandas.core.arrays import ExtensionArray
51+
4752

4853
class _Unstacker:
4954
"""
@@ -942,11 +947,11 @@ def _get_dummies_1d(
942947
data,
943948
prefix,
944949
prefix_sep="_",
945-
dummy_na=False,
946-
sparse=False,
947-
drop_first=False,
950+
dummy_na: bool = False,
951+
sparse: bool = False,
952+
drop_first: bool = False,
948953
dtype: Optional[Dtype] = None,
949-
):
954+
) -> DataFrame:
950955
from pandas.core.reshape.concat import concat
951956

952957
# Series avoids inconsistent NaN handling
@@ -1029,6 +1034,8 @@ def get_empty_frame(data) -> DataFrame:
10291034
sparse_series.append(Series(data=sarr, index=index, name=col))
10301035

10311036
out = concat(sparse_series, axis=1, copy=False)
1037+
# TODO: overload concat with Literal for axis
1038+
out = cast(DataFrame, out)
10321039
return out
10331040

10341041
else:
@@ -1045,7 +1052,9 @@ def get_empty_frame(data) -> DataFrame:
10451052
return DataFrame(dummy_mat, index=index, columns=dummy_cols)
10461053

10471054

1048-
def _reorder_for_extension_array_stack(arr, n_rows: int, n_columns: int):
1055+
def _reorder_for_extension_array_stack(
1056+
arr: ExtensionArray, n_rows: int, n_columns: int
1057+
) -> ExtensionArray:
10491058
"""
10501059
Re-orders the values when stacking multiple extension-arrays.
10511060

pandas/core/sorting.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343
_INT64_MAX = np.iinfo(np.int64).max
4444

4545

46-
# error: Function "numpy.array" is not valid as a type
4746
def get_indexer_indexer(
4847
target: Index,
4948
level: Union[str, int, List[str], List[int]],
@@ -52,7 +51,7 @@ def get_indexer_indexer(
5251
na_position: str,
5352
sort_remaining: bool,
5453
key: IndexKeyFunc,
55-
) -> Optional[np.array]: # type: ignore[valid-type]
54+
) -> Optional[np.ndarray]:
5655
"""
5756
Helper method that return the indexer according to input parameters for
5857
the sort_index method of DataFrame and Series.

pandas/core/tools/datetimes.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -534,25 +534,19 @@ def _to_datetime_with_unit(arg, unit, name, tz, errors: Optional[str]) -> Index:
534534
# GH#30050 pass an ndarray to tslib.array_with_unit_to_datetime
535535
# because it expects an ndarray argument
536536
if isinstance(arg, IntegerArray):
537-
result = arg.astype(f"datetime64[{unit}]")
537+
arr = arg.astype(f"datetime64[{unit}]")
538538
tz_parsed = None
539539
else:
540-
result, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors)
540+
arr, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors)
541541

542542
if errors == "ignore":
543543
# Index constructor _may_ infer to DatetimeIndex
544-
545-
# error: Incompatible types in assignment (expression has type "Index", variable
546-
# has type "ExtensionArray")
547-
result = Index(result, name=name) # type: ignore[assignment]
544+
result = Index(arr, name=name)
548545
else:
549-
# error: Incompatible types in assignment (expression has type "DatetimeIndex",
550-
# variable has type "ExtensionArray")
551-
result = DatetimeIndex(result, name=name) # type: ignore[assignment]
546+
result = DatetimeIndex(arr, name=name)
552547

553548
if not isinstance(result, DatetimeIndex):
554-
# error: Incompatible return value type (got "ExtensionArray", expected "Index")
555-
return result # type: ignore[return-value]
549+
return result
556550

557551
# GH#23758: We may still need to localize the result with tz
558552
# GH#25546: Apply tz_parsed first (from arg), then tz (from caller)

pandas/core/tools/numeric.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from typing import Optional
2+
13
import numpy as np
24

35
from pandas._libs import lib
@@ -164,13 +166,10 @@ def to_numeric(arg, errors="raise", downcast=None):
164166

165167
# GH33013: for IntegerArray & FloatingArray extract non-null values for casting
166168
# save mask to reconstruct the full array after casting
169+
mask: Optional[np.ndarray] = None
167170
if isinstance(values, NumericArray):
168171
mask = values._mask
169172
values = values._data[~mask]
170-
else:
171-
# error: Incompatible types in assignment (expression has type "None", variable
172-
# has type "ndarray")
173-
mask = None # type: ignore[assignment]
174173

175174
values_dtype = getattr(values, "dtype", None)
176175
if is_numeric_dtype(values_dtype):

0 commit comments

Comments
 (0)