Skip to content

Commit af4262a

Browse files
committed
Merge remote-tracking branch 'upstream/master' into ci/min_build
2 parents 0295f69 + 9512393 commit af4262a

File tree

30 files changed

+643
-469
lines changed

30 files changed

+643
-469
lines changed

.github/workflows/code-checks.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ jobs:
7474

7575
- name: Install pyright
7676
# note: keep version in sync with .pre-commit-config.yaml
77-
run: npm install -g pyright@1.1.200
77+
run: npm install -g pyright@1.1.202
7878

7979
- name: Build Pandas
8080
id: build

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ repos:
7878
types: [python]
7979
stages: [manual]
8080
# note: keep version in sync with .github/workflows/ci.yml
81-
additional_dependencies: ['pyright@1.1.200']
81+
additional_dependencies: ['pyright@1.1.202']
8282
- repo: local
8383
hooks:
8484
- id: flake8-rst

doc/source/whatsnew/v1.4.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -856,6 +856,7 @@ I/O
856856
- Bug in :func:`to_csv` always coercing datetime columns with different formats to the same format (:issue:`21734`)
857857
- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` with ``compression`` set to ``'zip'`` no longer create a zip file containing a file ending with ".zip". Instead, they try to infer the inner file name more smartly. (:issue:`39465`)
858858
- Bug in :func:`read_csv` where reading a mixed column of booleans and missing values to a float type results in the missing values becoming 1.0 rather than NaN (:issue:`42808`, :issue:`34120`)
859+
- Bug in :func:`to_xml` raising error for ``pd.NA`` with extension array dtype (:issue:`43903`)
859860
- Bug in :func:`read_csv` when passing simultaneously a parser in ``date_parser`` and ``parse_dates=False``, the parsing was still called (:issue:`44366`)
860861
- Bug in :func:`read_csv` not setting name of :class:`MultiIndex` columns correctly when ``index_col`` is not the first column (:issue:`38549`)
861862
- Bug in :func:`read_csv` silently ignoring errors when failing to create a memory-mapped file (:issue:`44766`)
@@ -898,6 +899,7 @@ Groupby/resample/rolling
898899
- Bug in :meth:`GroupBy.nth` failing on ``axis=1`` (:issue:`43926`)
899900
- Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not respecting right bound on centered datetime-like windows, if the index contain duplicates (:issue:`3944`)
900901
- Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` when using a :class:`pandas.api.indexers.BaseIndexer` subclass that returned unequal start and end arrays would segfault instead of raising a ``ValueError`` (:issue:`44470`)
902+
- Bug in :meth:`GroupBy.head` and :meth:`GroupBy.tail` not dropping groups with ``NaN`` when ``dropna=True`` (:issue:`45089`)
901903
- Fixed bug in :meth:`GroupBy.__iter__` after selecting a subset of columns in a :class:`GroupBy` object, which returned all columns instead of the chosen subset (:issue:`#44821`)
902904
- Bug in :meth:`Groupby.rolling` when non-monotonic data passed, fails to correctly raise ``ValueError`` (:issue:`43909`)
903905
- Fixed bug where grouping by a :class:`Series` that has a categorical data type and length unequal to the axis of grouping raised ``ValueError`` (:issue:`44179`)

pandas/_libs/reshape.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def explode(ndarray[object] values):
8787
8888
Parameters
8989
----------
90-
values : object ndarray
90+
values : ndarray[object]
9191
9292
Returns
9393
-------

pandas/_typing.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@
8484
DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"]
8585
PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"]
8686
Scalar = Union[PythonScalar, PandasScalar]
87+
IntStrT = TypeVar("IntStrT", int, str)
8788

8889

8990
# timestamp and timedelta convertible types

pandas/core/algorithms.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ def _get_values_for_rank(values: ArrayLike) -> np.ndarray:
298298
return values
299299

300300

301-
def get_data_algo(values: ArrayLike):
301+
def _get_data_algo(values: ArrayLike):
302302
values = _get_values_for_rank(values)
303303

304304
ndtype = _check_object_for_strings(values)
@@ -555,7 +555,7 @@ def factorize_array(
555555
codes : ndarray[np.intp]
556556
uniques : ndarray
557557
"""
558-
hash_klass, values = get_data_algo(values)
558+
hash_klass, values = _get_data_algo(values)
559559

560560
table = hash_klass(size_hint or len(values))
561561
uniques, codes = table.factorize(
@@ -1747,7 +1747,7 @@ def safe_sort(
17471747

17481748
if sorter is None:
17491749
# mixed types
1750-
hash_klass, values = get_data_algo(values)
1750+
hash_klass, values = _get_data_algo(values)
17511751
t = hash_klass(len(values))
17521752
t.map_locations(values)
17531753
sorter = ensure_platform_int(t.lookup(ordered))

pandas/core/arrays/categorical.py

Lines changed: 15 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@
5555
)
5656
from pandas.core.dtypes.common import (
5757
ensure_int64,
58-
ensure_object,
5958
ensure_platform_int,
6059
is_categorical_dtype,
6160
is_datetime64_dtype,
@@ -85,15 +84,17 @@
8584
notna,
8685
)
8786

88-
from pandas.core import ops
87+
from pandas.core import (
88+
arraylike,
89+
ops,
90+
)
8991
from pandas.core.accessor import (
9092
PandasDelegate,
9193
delegate_names,
9294
)
9395
import pandas.core.algorithms as algorithms
9496
from pandas.core.algorithms import (
9597
factorize,
96-
get_data_algo,
9798
take_nd,
9899
unique1d,
99100
)
@@ -1516,6 +1517,14 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
15161517
if result is not NotImplemented:
15171518
return result
15181519

1520+
if method == "reduce":
1521+
# e.g. TestCategoricalAnalytics::test_min_max_ordered
1522+
result = arraylike.dispatch_reduction_ufunc(
1523+
self, ufunc, method, *inputs, **kwargs
1524+
)
1525+
if result is not NotImplemented:
1526+
return result
1527+
15191528
# for all other cases, raise for now (similarly as what happens in
15201529
# Series.__array_prepare__)
15211530
raise TypeError(
@@ -2749,8 +2758,6 @@ def _get_codes_for_values(values, categories: Index) -> np.ndarray:
27492758
27502759
If `values` is known to be a Categorical, use recode_for_categories instead.
27512760
"""
2752-
dtype_equal = is_dtype_equal(values.dtype, categories.dtype)
2753-
27542761
if values.ndim > 1:
27552762
flat = values.ravel()
27562763
codes = _get_codes_for_values(flat, categories)
@@ -2762,30 +2769,9 @@ def _get_codes_for_values(values, categories: Index) -> np.ndarray:
27622769
# Categorical(array[Period, Period], categories=PeriodIndex(...))
27632770
cls = categories.dtype.construct_array_type()
27642771
values = maybe_cast_to_extension_array(cls, values)
2765-
if not isinstance(values, cls):
2766-
# exception raised in _from_sequence
2767-
values = ensure_object(values)
2768-
# error: Incompatible types in assignment (expression has type
2769-
# "ndarray", variable has type "Index")
2770-
categories = ensure_object(categories) # type: ignore[assignment]
2771-
elif not dtype_equal:
2772-
values = ensure_object(values)
2773-
# error: Incompatible types in assignment (expression has type "ndarray",
2774-
# variable has type "Index")
2775-
categories = ensure_object(categories) # type: ignore[assignment]
2776-
2777-
if isinstance(categories, ABCIndex):
2778-
return coerce_indexer_dtype(categories.get_indexer_for(values), categories)
2779-
2780-
# Only hit here when we've already coerced to object dtypee.
2781-
2782-
hash_klass, vals = get_data_algo(values)
2783-
# pandas/core/arrays/categorical.py:2661: error: Argument 1 to "get_data_algo" has
2784-
# incompatible type "Index"; expected "Union[ExtensionArray, ndarray]" [arg-type]
2785-
_, cats = get_data_algo(categories) # type: ignore[arg-type]
2786-
t = hash_klass(len(cats))
2787-
t.map_locations(cats)
2788-
return coerce_indexer_dtype(t.lookup(vals), cats)
2772+
2773+
codes = categories.get_indexer_for(values)
2774+
return coerce_indexer_dtype(codes, categories)
27892775

27902776

27912777
def recode_for_categories(

pandas/core/arrays/numpy_.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
from __future__ import annotations
22

3-
import numbers
4-
53
import numpy as np
64

75
from pandas._libs import lib
@@ -130,8 +128,6 @@ def dtype(self) -> PandasDtype:
130128
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
131129
return np.asarray(self._ndarray, dtype=dtype)
132130

133-
_HANDLED_TYPES = (np.ndarray, numbers.Number)
134-
135131
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
136132
# Lightly modified version of
137133
# https://numpy.org/doc/stable/reference/generated/numpy.lib.mixins.NDArrayOperatorsMixin.html

pandas/core/arrays/sparse/array.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
notna,
7474
)
7575

76+
from pandas.core import arraylike
7677
import pandas.core.algorithms as algos
7778
from pandas.core.arraylike import OpsMixin
7879
from pandas.core.arrays import ExtensionArray
@@ -1415,7 +1416,9 @@ def any(self, axis=0, *args, **kwargs):
14151416

14161417
return values.any().item()
14171418

1418-
def sum(self, axis: int = 0, min_count: int = 0, *args, **kwargs) -> Scalar:
1419+
def sum(
1420+
self, axis: int = 0, min_count: int = 0, skipna: bool = True, *args, **kwargs
1421+
) -> Scalar:
14191422
"""
14201423
Sum of non-NA/null values
14211424
@@ -1437,6 +1440,11 @@ def sum(self, axis: int = 0, min_count: int = 0, *args, **kwargs) -> Scalar:
14371440
nv.validate_sum(args, kwargs)
14381441
valid_vals = self._valid_sp_values
14391442
sp_sum = valid_vals.sum()
1443+
has_na = self.sp_index.ngaps > 0 and not self._null_fill_value
1444+
1445+
if has_na and not skipna:
1446+
return na_value_for_dtype(self.dtype.subtype, compat=False)
1447+
14401448
if self._null_fill_value:
14411449
if check_below_min_count(valid_vals.shape, None, min_count):
14421450
return na_value_for_dtype(self.dtype.subtype, compat=False)
@@ -1589,6 +1597,21 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
15891597
if result is not NotImplemented:
15901598
return result
15911599

1600+
if "out" in kwargs:
1601+
# e.g. tests.arrays.sparse.test_arithmetics.test_ndarray_inplace
1602+
res = arraylike.dispatch_ufunc_with_out(
1603+
self, ufunc, method, *inputs, **kwargs
1604+
)
1605+
return res
1606+
1607+
if method == "reduce":
1608+
result = arraylike.dispatch_reduction_ufunc(
1609+
self, ufunc, method, *inputs, **kwargs
1610+
)
1611+
if result is not NotImplemented:
1612+
# e.g. tests.series.test_ufunc.TestNumpyReductions
1613+
return result
1614+
15921615
if len(inputs) == 1:
15931616
# No alignment necessary.
15941617
sp_values = getattr(ufunc, method)(self.sp_values, **kwargs)
@@ -1611,7 +1634,8 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
16111634
sp_values, self.sp_index, SparseDtype(sp_values.dtype, fill_value)
16121635
)
16131636

1614-
result = getattr(ufunc, method)(*(np.asarray(x) for x in inputs), **kwargs)
1637+
new_inputs = tuple(np.asarray(x) for x in inputs)
1638+
result = getattr(ufunc, method)(*new_inputs, **kwargs)
16151639
if out:
16161640
if len(out) == 1:
16171641
out = out[0]

pandas/core/groupby/groupby.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3580,6 +3580,9 @@ def _mask_selected_obj(self, mask: np.ndarray) -> NDFrameT:
35803580
Series or DataFrame
35813581
Filtered _selected_obj.
35823582
"""
3583+
ids = self.grouper.group_info[0]
3584+
mask = mask & (ids != -1)
3585+
35833586
if self.axis == 0:
35843587
return self._selected_obj[mask]
35853588
else:

pandas/core/indexes/base.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -878,6 +878,12 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs):
878878
if result is not NotImplemented:
879879
return result
880880

881+
if "out" in kwargs:
882+
# e.g. test_dti_isub_tdi
883+
return arraylike.dispatch_ufunc_with_out(
884+
self, ufunc, method, *inputs, **kwargs
885+
)
886+
881887
if method == "reduce":
882888
result = arraylike.dispatch_reduction_ufunc(
883889
self, ufunc, method, *inputs, **kwargs

0 commit comments

Comments
 (0)