pandas-dev
diff --git a/‎.github/workflows/code-checks.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/code-checks.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.pre-commit-config.yaml
Lines changed: 1 addition & 1 deletion b/‎.pre-commit-config.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v1.4.0.rst
Lines changed: 2 additions & 0 deletions b/‎doc/source/whatsnew/v1.4.0.rst
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/_libs/reshape.pyx
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/reshape.pyx
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_typing.py
Lines changed: 1 addition & 0 deletions b/‎pandas/_typing.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎pandas/core/algorithms.py
Lines changed: 3 additions & 3 deletions b/‎pandas/core/algorithms.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎pandas/core/arrays/categorical.py
Lines changed: 15 additions & 29 deletions b/‎pandas/core/arrays/categorical.py
Lines changed: 15 additions & 29 deletions
diff --git a/‎pandas/core/arrays/numpy_.py
Lines changed: 0 additions & 4 deletions b/‎pandas/core/arrays/numpy_.py
Lines changed: 0 additions & 4 deletions
diff --git a/‎pandas/core/arrays/sparse/array.py
Lines changed: 26 additions & 2 deletions b/‎pandas/core/arrays/sparse/array.py
Lines changed: 26 additions & 2 deletions
diff --git a/‎pandas/core/groupby/groupby.py
Lines changed: 3 additions & 0 deletions b/‎pandas/core/groupby/groupby.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎pandas/core/indexes/base.py
Lines changed: 6 additions & 0 deletions b/‎pandas/core/indexes/base.py
Lines changed: 6 additions & 0 deletions
@@ -74,7 +74,7 @@ jobs:
 
     - name: Install pyright
       # note: keep version in sync with .pre-commit-config.yaml
-      run: npm install -g pyright@1.1.200
+      run: npm install -g pyright@1.1.202
 
     - name: Build Pandas
       id: build
 
@@ -78,7 +78,7 @@ repos:
         types: [python]
         stages: [manual]
         # note: keep version in sync with .github/workflows/ci.yml
-        additional_dependencies: ['pyright@1.1.200']
+        additional_dependencies: ['pyright@1.1.202']
 -   repo: local
     hooks:
     -   id: flake8-rst
 
@@ -856,6 +856,7 @@ I/O
 - Bug in :func:`to_csv` always coercing datetime columns with different formats to the same format (:issue:`21734`)
 - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` with ``compression`` set to ``'zip'`` no longer create a zip file containing a file ending with ".zip". Instead, they try to infer the inner file name more smartly. (:issue:`39465`)
 - Bug in :func:`read_csv` where reading a mixed column of booleans and missing values to a float type results in the missing values becoming 1.0 rather than NaN (:issue:`42808`, :issue:`34120`)
+- Bug in :func:`to_xml` raising error for ``pd.NA`` with extension array dtype (:issue:`43903`)
 - Bug in :func:`read_csv` when passing simultaneously a parser in ``date_parser`` and ``parse_dates=False``, the parsing was still called (:issue:`44366`)
 - Bug in :func:`read_csv` not setting name of :class:`MultiIndex` columns correctly when ``index_col`` is not the first column (:issue:`38549`)
 - Bug in :func:`read_csv` silently ignoring errors when failing to create a memory-mapped file (:issue:`44766`)
@@ -898,6 +899,7 @@ Groupby/resample/rolling
 - Bug in :meth:`GroupBy.nth` failing on ``axis=1`` (:issue:`43926`)
 - Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not respecting right bound on centered datetime-like windows, if the index contain duplicates (:issue:`3944`)
 - Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` when using a :class:`pandas.api.indexers.BaseIndexer` subclass that returned unequal start and end arrays would segfault instead of raising a ``ValueError`` (:issue:`44470`)
+- Bug in :meth:`GroupBy.head` and :meth:`GroupBy.tail` not dropping groups with ``NaN`` when ``dropna=True`` (:issue:`45089`)
 - Fixed bug in :meth:`GroupBy.__iter__` after selecting a subset of columns in a :class:`GroupBy` object, which returned all columns instead of the chosen subset (:issue:`#44821`)
 - Bug in :meth:`Groupby.rolling` when non-monotonic data passed, fails to correctly raise ``ValueError`` (:issue:`43909`)
 - Fixed bug where grouping by a :class:`Series` that has a categorical data type and length unequal to the axis of grouping raised ``ValueError`` (:issue:`44179`)
 
@@ -87,7 +87,7 @@ def explode(ndarray[object] values):
 
     Parameters
     ----------
-    values : object ndarray
+    values : ndarray[object]
 
     Returns
     -------
 
@@ -84,6 +84,7 @@
 DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"]
 PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"]
 Scalar = Union[PythonScalar, PandasScalar]
+IntStrT = TypeVar("IntStrT", int, str)
 
 
 # timestamp and timedelta convertible types
 
@@ -298,7 +298,7 @@ def _get_values_for_rank(values: ArrayLike) -> np.ndarray:
     return values
 
 
-def get_data_algo(values: ArrayLike):
+def _get_data_algo(values: ArrayLike):
     values = _get_values_for_rank(values)
 
     ndtype = _check_object_for_strings(values)
@@ -555,7 +555,7 @@ def factorize_array(
     codes : ndarray[np.intp]
     uniques : ndarray
     """
-    hash_klass, values = get_data_algo(values)
+    hash_klass, values = _get_data_algo(values)
 
     table = hash_klass(size_hint or len(values))
     uniques, codes = table.factorize(
@@ -1747,7 +1747,7 @@ def safe_sort(
 
     if sorter is None:
         # mixed types
-        hash_klass, values = get_data_algo(values)
+        hash_klass, values = _get_data_algo(values)
         t = hash_klass(len(values))
         t.map_locations(values)
         sorter = ensure_platform_int(t.lookup(ordered))
 
@@ -55,7 +55,6 @@
 )
 from pandas.core.dtypes.common import (
     ensure_int64,
-    ensure_object,
     ensure_platform_int,
     is_categorical_dtype,
     is_datetime64_dtype,
@@ -85,15 +84,17 @@
     notna,
 )
 
-from pandas.core import ops
+from pandas.core import (
+    arraylike,
+    ops,
+)
 from pandas.core.accessor import (
     PandasDelegate,
     delegate_names,
 )
 import pandas.core.algorithms as algorithms
 from pandas.core.algorithms import (
     factorize,
-    get_data_algo,
     take_nd,
     unique1d,
 )
@@ -1516,6 +1517,14 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
         if result is not NotImplemented:
             return result
 
+        if method == "reduce":
+            # e.g. TestCategoricalAnalytics::test_min_max_ordered
+            result = arraylike.dispatch_reduction_ufunc(
+                self, ufunc, method, *inputs, **kwargs
+            )
+            if result is not NotImplemented:
+                return result
+
         # for all other cases, raise for now (similarly as what happens in
         # Series.__array_prepare__)
         raise TypeError(
@@ -2749,8 +2758,6 @@ def _get_codes_for_values(values, categories: Index) -> np.ndarray:
 
     If `values` is known to be a Categorical, use recode_for_categories instead.
     """
-    dtype_equal = is_dtype_equal(values.dtype, categories.dtype)
-
     if values.ndim > 1:
         flat = values.ravel()
         codes = _get_codes_for_values(flat, categories)
@@ -2762,30 +2769,9 @@ def _get_codes_for_values(values, categories: Index) -> np.ndarray:
         # Categorical(array[Period, Period], categories=PeriodIndex(...))
         cls = categories.dtype.construct_array_type()
         values = maybe_cast_to_extension_array(cls, values)
-        if not isinstance(values, cls):
-            # exception raised in _from_sequence
-            values = ensure_object(values)
-            # error: Incompatible types in assignment (expression has type
-            # "ndarray", variable has type "Index")
-            categories = ensure_object(categories)  # type: ignore[assignment]
-    elif not dtype_equal:
-        values = ensure_object(values)
-        # error: Incompatible types in assignment (expression has type "ndarray",
-        # variable has type "Index")
-        categories = ensure_object(categories)  # type: ignore[assignment]
-
-    if isinstance(categories, ABCIndex):
-        return coerce_indexer_dtype(categories.get_indexer_for(values), categories)
-
-    # Only hit here when we've already coerced to object dtypee.
-
-    hash_klass, vals = get_data_algo(values)
-    # pandas/core/arrays/categorical.py:2661: error: Argument 1 to "get_data_algo" has
-    # incompatible type "Index"; expected "Union[ExtensionArray, ndarray]"  [arg-type]
-    _, cats = get_data_algo(categories)  # type: ignore[arg-type]
-    t = hash_klass(len(cats))
-    t.map_locations(cats)
-    return coerce_indexer_dtype(t.lookup(vals), cats)
+
+    codes = categories.get_indexer_for(values)
+    return coerce_indexer_dtype(codes, categories)
 
 
 def recode_for_categories(
 
@@ -1,7 +1,5 @@
 from __future__ import annotations
 
-import numbers
-
 import numpy as np
 
 from pandas._libs import lib
@@ -130,8 +128,6 @@ def dtype(self) -> PandasDtype:
     def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
         return np.asarray(self._ndarray, dtype=dtype)
 
-    _HANDLED_TYPES = (np.ndarray, numbers.Number)
-
     def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
         # Lightly modified version of
         # https://numpy.org/doc/stable/reference/generated/numpy.lib.mixins.NDArrayOperatorsMixin.html
 
@@ -73,6 +73,7 @@
     notna,
 )
 
+from pandas.core import arraylike
 import pandas.core.algorithms as algos
 from pandas.core.arraylike import OpsMixin
 from pandas.core.arrays import ExtensionArray
@@ -1415,7 +1416,9 @@ def any(self, axis=0, *args, **kwargs):
 
         return values.any().item()
 
-    def sum(self, axis: int = 0, min_count: int = 0, *args, **kwargs) -> Scalar:
+    def sum(
+        self, axis: int = 0, min_count: int = 0, skipna: bool = True, *args, **kwargs
+    ) -> Scalar:
         """
         Sum of non-NA/null values
 
@@ -1437,6 +1440,11 @@ def sum(self, axis: int = 0, min_count: int = 0, *args, **kwargs) -> Scalar:
         nv.validate_sum(args, kwargs)
         valid_vals = self._valid_sp_values
         sp_sum = valid_vals.sum()
+        has_na = self.sp_index.ngaps > 0 and not self._null_fill_value
+
+        if has_na and not skipna:
+            return na_value_for_dtype(self.dtype.subtype, compat=False)
+
         if self._null_fill_value:
             if check_below_min_count(valid_vals.shape, None, min_count):
                 return na_value_for_dtype(self.dtype.subtype, compat=False)
@@ -1589,6 +1597,21 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
         if result is not NotImplemented:
             return result
 
+        if "out" in kwargs:
+            # e.g. tests.arrays.sparse.test_arithmetics.test_ndarray_inplace
+            res = arraylike.dispatch_ufunc_with_out(
+                self, ufunc, method, *inputs, **kwargs
+            )
+            return res
+
+        if method == "reduce":
+            result = arraylike.dispatch_reduction_ufunc(
+                self, ufunc, method, *inputs, **kwargs
+            )
+            if result is not NotImplemented:
+                # e.g. tests.series.test_ufunc.TestNumpyReductions
+                return result
+
         if len(inputs) == 1:
             # No alignment necessary.
             sp_values = getattr(ufunc, method)(self.sp_values, **kwargs)
@@ -1611,7 +1634,8 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
                 sp_values, self.sp_index, SparseDtype(sp_values.dtype, fill_value)
             )
 
-        result = getattr(ufunc, method)(*(np.asarray(x) for x in inputs), **kwargs)
+        new_inputs = tuple(np.asarray(x) for x in inputs)
+        result = getattr(ufunc, method)(*new_inputs, **kwargs)
         if out:
             if len(out) == 1:
                 out = out[0]
 
@@ -3580,6 +3580,9 @@ def _mask_selected_obj(self, mask: np.ndarray) -> NDFrameT:
         Series or DataFrame
             Filtered _selected_obj.
         """
+        ids = self.grouper.group_info[0]
+        mask = mask & (ids != -1)
+
         if self.axis == 0:
             return self._selected_obj[mask]
         else:
 
@@ -878,6 +878,12 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs):
         if result is not NotImplemented:
             return result
 
+        if "out" in kwargs:
+            # e.g. test_dti_isub_tdi
+            return arraylike.dispatch_ufunc_with_out(
+                self, ufunc, method, *inputs, **kwargs
+            )
+
         if method == "reduce":
             result = arraylike.dispatch_reduction_ufunc(
                 self, ufunc, method, *inputs, **kwargs