pandas-dev
diff --git a/‎.pre-commit-config.yaml
Lines changed: 8 additions & 3 deletions b/‎.pre-commit-config.yaml
Lines changed: 8 additions & 3 deletions
diff --git a/‎asv_bench/benchmarks/io/csv.py
Lines changed: 2 additions & 2 deletions b/‎asv_bench/benchmarks/io/csv.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/source/whatsnew/v1.2.2.rst
Lines changed: 1 addition & 0 deletions b/‎doc/source/whatsnew/v1.2.2.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/source/whatsnew/v1.3.0.rst
Lines changed: 10 additions & 0 deletions b/‎doc/source/whatsnew/v1.3.0.rst
Lines changed: 10 additions & 0 deletions
diff --git a/‎pandas/__init__.py
Lines changed: 1 addition & 1 deletion b/‎pandas/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_libs/index_class_helper.pxi.in
Lines changed: 7 additions & 0 deletions b/‎pandas/_libs/index_class_helper.pxi.in
Lines changed: 7 additions & 0 deletions
diff --git a/‎pandas/_libs/tslibs/__init__.py
Lines changed: 22 additions & 12 deletions b/‎pandas/_libs/tslibs/__init__.py
Lines changed: 22 additions & 12 deletions
diff --git a/‎pandas/core/aggregation.py
Lines changed: 2 additions & 216 deletions b/‎pandas/core/aggregation.py
Lines changed: 2 additions & 216 deletions
@@ -24,10 +24,10 @@ repos:
     hooks:
     -   id: isort
 -   repo: https://github.com/asottile/pyupgrade
-    rev: v2.7.4
+    rev: v2.9.0
     hooks:
     -   id: pyupgrade
-        args: [--py37-plus]
+        args: [--py37-plus, --keep-runtime-typing]
 -   repo: https://github.com/pre-commit/pygrep-hooks
     rev: v1.7.0
     hooks:
@@ -192,6 +192,11 @@ repos:
         files: ^pandas/
         exclude: ^pandas/tests/
 -   repo: https://github.com/MarcoGorelli/no-string-hints
-    rev: v0.1.6
+    rev: v0.1.7
     hooks:
     -   id: no-string-hints
+-   repo: https://github.com/MarcoGorelli/abs-imports
+    rev: v0.1.2
+    hooks:
+    -   id: abs-imports
+        files: ^pandas/
@@ -84,8 +84,8 @@ class ToCSVIndexes(BaseIO):
     def _create_df(rows, cols):
         index_cols = {
             "index1": np.random.randint(0, rows, rows),
-            "index2": np.full(rows, 1, dtype=np.int),
-            "index3": np.full(rows, 1, dtype=np.int),
+            "index2": np.full(rows, 1, dtype=int),
+            "index3": np.full(rows, 1, dtype=int),
         }
         data_cols = {
             f"col{i}": np.random.uniform(0, 100000.0, rows) for i in range(cols)
 
@@ -17,6 +17,7 @@ Fixed regressions
 
 - Fixed regression in :func:`read_excel` that caused it to raise ``AttributeError`` when checking version of older xlrd versions (:issue:`38955`)
 - Fixed regression in :class:`DataFrame` constructor reordering element when construction from datetime ndarray with dtype not ``"datetime64[ns]"`` (:issue:`39422`)
+- Fixed regression in :class:`DataFrame.astype` and :class:`Series.astype` not casting to bytes dtype (:issue:`39474`)
 - Fixed regression in :meth:`~DataFrame.to_pickle` failing to create bz2/xz compressed pickle files with ``protocol=5`` (:issue:`39002`)
 - Fixed regression in :func:`pandas.testing.assert_series_equal` and :func:`pandas.testing.assert_frame_equal` always raising ``AssertionError`` when comparing extension dtypes (:issue:`39410`)
 - Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamWriter`` in binary mode instead of in text mode and ignoring user-provided ``mode`` (:issue:`39247`)
 
@@ -338,8 +338,12 @@ Indexing
 - Bug in setting ``timedelta64`` values into numeric :class:`Series` failing to cast to object dtype (:issue:`39086`)
 - Bug in setting :class:`Interval` values into a :class:`Series` or :class:`DataFrame` with mismatched :class:`IntervalDtype` incorrectly casting the new values to the existing dtype (:issue:`39120`)
 - Bug in setting ``datetime64`` values into a :class:`Series` with integer-dtype incorrect casting the datetime64 values to integers (:issue:`39266`)
+- Bug in :meth:`Index.get_loc` not raising ``KeyError`` when method is specified for ``NaN`` value when ``NaN`` is not in :class:`Index` (:issue:`39382`)
 - Bug in incorrectly raising in :meth:`Index.insert`, when setting a new column that cannot be held in the existing ``frame.columns``, or in :meth:`Series.reset_index` or :meth:`DataFrame.reset_index` instead of casting to a compatible dtype (:issue:`39068`)
 - Bug in :meth:`RangeIndex.append` where a single object of length 1 was concatenated incorrectly (:issue:`39401`)
+- Bug in setting ``numpy.timedelta64`` values into an object-dtype :class:`Series` using a boolean indexer (:issue:`39488`)
+- Bug in setting numeric values into a into a boolean-dtypes :class:`Series` using ``at`` or ``iat`` failing to cast to object-dtype (:issue:`39582`)
+-
 
 Missing
 ^^^^^^^
@@ -412,7 +416,9 @@ Reshaping
 - :meth:`merge_asof` raises ``ValueError`` instead of cryptic ``TypeError`` in case of non-numerical merge columns (:issue:`29130`)
 - Bug in :meth:`DataFrame.join` not assigning values correctly when having :class:`MultiIndex` where at least one dimension is from dtype ``Categorical`` with non-alphabetically sorted categories (:issue:`38502`)
 - :meth:`Series.value_counts` and :meth:`Series.mode` return consistent keys in original order (:issue:`12679`, :issue:`11227` and :issue:`39007`)
+- Bug in :meth:`DataFrame.stack` not handling ``NaN`` in :class:`MultiIndex` columns correct (:issue:`39481`)
 - Bug in :meth:`DataFrame.apply` would give incorrect results when used with a string argument and ``axis=1`` when the axis argument was not supported and now raises a ``ValueError`` instead (:issue:`39211`)
+- Bug in :meth:`DataFrame.sort_values` not reshaping index correctly after sorting on columns, when ``ignore_index=True`` (:issue:`39464`)
 - Bug in :meth:`DataFrame.append` returning incorrect dtypes with combinations of ``ExtensionDtype`` dtypes (:issue:`39454`)
 
 Sparse
@@ -434,7 +440,11 @@ Other
 - Bug in :class:`Index` constructor sometimes silently ignorning a specified ``dtype`` (:issue:`38879`)
 - Bug in constructing a :class:`Series` from a list and a :class:`PandasDtype` (:issue:`39357`)
 - Bug in :class:`Styler` which caused CSS to duplicate on multiple renders. (:issue:`39395`)
+- :meth:`Index.where` behavior now mirrors :meth:`Index.putmask` behavior, i.e. ``index.where(mask, other)`` matches ``index.putmask(~mask, other)`` (:issue:`39412`)
 - Bug in :func:`pandas.testing.assert_series_equal`, :func:`pandas.testing.assert_frame_equal`, :func:`pandas.testing.assert_index_equal` and :func:`pandas.testing.assert_extension_array_equal` incorrectly raising when an attribute has an unrecognized NA type (:issue:`39461`)
+- Bug in :class:`Styler` where ``subset`` arg in methods raised an error for some valid multiindex slices (:issue:`33562`)
+-
+-
 
 .. ---------------------------------------------------------------------------
 
 
@@ -180,7 +180,7 @@
 import pandas.arrays
 
 # use the closest tagged version if possible
-from ._version import get_versions
+from pandas._version import get_versions
 
 v = get_versions()
 __version__ = v.get("closest-tag", v["version"])
 
@@ -57,7 +57,14 @@ cdef class {{name}}Engine(IndexEngine):
             with warnings.catch_warnings():
                 # e.g. if values is float64 and `val` is a str, suppress warning
                 warnings.filterwarnings("ignore", category=FutureWarning)
+                {{if name in {'Float64', 'Float32'} }}
+                if util.is_nan(val):
+                    indexer = np.isnan(values)
+                else:
+                    indexer = values == val
+                {{else}}
                 indexer = values == val
+                {{endif}}
         except TypeError:
             # if the equality above returns a bool, cython will raise TypeError
             #  when trying to cast it to ndarray
 
@@ -27,18 +27,28 @@
     "tz_compare",
 ]
 
-from . import dtypes
-from .conversion import OutOfBoundsTimedelta, localize_pydatetime
-from .dtypes import Resolution
-from .nattype import NaT, NaTType, iNaT, is_null_datetimelike, nat_strings
-from .np_datetime import OutOfBoundsDatetime
-from .offsets import BaseOffset, Tick, to_offset
-from .period import IncompatibleFrequency, Period
-from .timedeltas import Timedelta, delta_to_nanoseconds, ints_to_pytimedelta
-from .timestamps import Timestamp
-from .timezones import tz_compare
-from .tzconversion import tz_convert_from_utc_single
-from .vectorized import (
+from pandas._libs.tslibs import dtypes
+from pandas._libs.tslibs.conversion import OutOfBoundsTimedelta, localize_pydatetime
+from pandas._libs.tslibs.dtypes import Resolution
+from pandas._libs.tslibs.nattype import (
+    NaT,
+    NaTType,
+    iNaT,
+    is_null_datetimelike,
+    nat_strings,
+)
+from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
+from pandas._libs.tslibs.offsets import BaseOffset, Tick, to_offset
+from pandas._libs.tslibs.period import IncompatibleFrequency, Period
+from pandas._libs.tslibs.timedeltas import (
+    Timedelta,
+    delta_to_nanoseconds,
+    ints_to_pytimedelta,
+)
+from pandas._libs.tslibs.timestamps import Timestamp
+from pandas._libs.tslibs.timezones import tz_compare
+from pandas._libs.tslibs.tzconversion import tz_convert_from_utc_single
+from pandas._libs.tslibs.vectorized import (
     dt64arr_to_periodarr,
     get_resolution,
     ints_to_pydatetime,
 
@@ -27,18 +27,16 @@
     AggFuncType,
     AggFuncTypeBase,
     AggFuncTypeDict,
-    AggObjType,
     Axis,
     FrameOrSeries,
     FrameOrSeriesUnion,
 )
 
-from pandas.core.dtypes.cast import is_nested_object
 from pandas.core.dtypes.common import is_dict_like, is_list_like
-from pandas.core.dtypes.generic import ABCDataFrame, ABCNDFrame, ABCSeries
+from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
 
 from pandas.core.algorithms import safe_sort
-from pandas.core.base import DataError, SpecificationError
+from pandas.core.base import SpecificationError
 import pandas.core.common as com
 from pandas.core.indexes.api import Index
 
@@ -532,215 +530,3 @@ def transform_str_or_callable(
         return obj.apply(func, args=args, **kwargs)
     except Exception:
         return func(obj, *args, **kwargs)
-
-
-def agg_list_like(
-    obj: AggObjType,
-    arg: List[AggFuncTypeBase],
-    _axis: int,
-) -> FrameOrSeriesUnion:
-    """
-    Compute aggregation in the case of a list-like argument.
-
-    Parameters
-    ----------
-    obj : Pandas object to compute aggregation on.
-    arg : list
-        Aggregations to compute.
-    _axis : int, 0 or 1
-        Axis to compute aggregation on.
-
-    Returns
-    -------
-    Result of aggregation.
-    """
-    from pandas.core.reshape.concat import concat
-
-    if _axis != 0:
-        raise NotImplementedError("axis other than 0 is not supported")
-
-    if obj._selected_obj.ndim == 1:
-        selected_obj = obj._selected_obj
-    else:
-        selected_obj = obj._obj_with_exclusions
-
-    results = []
-    keys = []
-
-    # degenerate case
-    if selected_obj.ndim == 1:
-        for a in arg:
-            colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj)
-            try:
-                new_res = colg.aggregate(a)
-
-            except TypeError:
-                pass
-            else:
-                results.append(new_res)
-
-                # make sure we find a good name
-                name = com.get_callable_name(a) or a
-                keys.append(name)
-
-    # multiples
-    else:
-        for index, col in enumerate(selected_obj):
-            colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index])
-            try:
-                new_res = colg.aggregate(arg)
-            except (TypeError, DataError):
-                pass
-            except ValueError as err:
-                # cannot aggregate
-                if "Must produce aggregated value" in str(err):
-                    # raised directly in _aggregate_named
-                    pass
-                elif "no results" in str(err):
-                    # raised directly in _aggregate_multiple_funcs
-                    pass
-                else:
-                    raise
-            else:
-                results.append(new_res)
-                keys.append(col)
-
-    # if we are empty
-    if not len(results):
-        raise ValueError("no results")
-
-    try:
-        return concat(results, keys=keys, axis=1, sort=False)
-    except TypeError as err:
-
-        # we are concatting non-NDFrame objects,
-        # e.g. a list of scalars
-
-        from pandas import Series
-
-        result = Series(results, index=keys, name=obj.name)
-        if is_nested_object(result):
-            raise ValueError(
-                "cannot combine transform and aggregation operations"
-            ) from err
-        return result
-
-
-def agg_dict_like(
-    obj: AggObjType,
-    arg: AggFuncTypeDict,
-    _axis: int,
-) -> FrameOrSeriesUnion:
-    """
-    Compute aggregation in the case of a dict-like argument.
-
-    Parameters
-    ----------
-    obj : Pandas object to compute aggregation on.
-    arg : dict
-        label-aggregation pairs to compute.
-    _axis : int, 0 or 1
-        Axis to compute aggregation on.
-
-    Returns
-    -------
-    Result of aggregation.
-    """
-    is_aggregator = lambda x: isinstance(x, (list, tuple, dict))
-
-    if _axis != 0:  # pragma: no cover
-        raise ValueError("Can only pass dict with axis=0")
-
-    selected_obj = obj._selected_obj
-
-    # if we have a dict of any non-scalars
-    # eg. {'A' : ['mean']}, normalize all to
-    # be list-likes
-    # Cannot use arg.values() because arg may be a Series
-    if any(is_aggregator(x) for _, x in arg.items()):
-        new_arg: AggFuncTypeDict = {}
-        for k, v in arg.items():
-            if not isinstance(v, (tuple, list, dict)):
-                new_arg[k] = [v]
-            else:
-                new_arg[k] = v
-
-            # the keys must be in the columns
-            # for ndim=2, or renamers for ndim=1
-
-            # ok for now, but deprecated
-            # {'A': { 'ra': 'mean' }}
-            # {'A': { 'ra': ['mean'] }}
-            # {'ra': ['mean']}
-
-            # not ok
-            # {'ra' : { 'A' : 'mean' }}
-            if isinstance(v, dict):
-                raise SpecificationError("nested renamer is not supported")
-            elif isinstance(selected_obj, ABCSeries):
-                raise SpecificationError("nested renamer is not supported")
-            elif (
-                isinstance(selected_obj, ABCDataFrame) and k not in selected_obj.columns
-            ):
-                raise KeyError(f"Column '{k}' does not exist!")
-
-        arg = new_arg
-
-    else:
-        # deprecation of renaming keys
-        # GH 15931
-        keys = list(arg.keys())
-        if isinstance(selected_obj, ABCDataFrame) and len(
-            selected_obj.columns.intersection(keys)
-        ) != len(keys):
-            cols = list(
-                safe_sort(
-                    list(set(keys) - set(selected_obj.columns.intersection(keys))),
-                )
-            )
-            raise SpecificationError(f"Column(s) {cols} do not exist")
-
-    from pandas.core.reshape.concat import concat
-
-    if selected_obj.ndim == 1:
-        # key only used for output
-        colg = obj._gotitem(obj._selection, ndim=1)
-        results = {key: colg.agg(how) for key, how in arg.items()}
-    else:
-        # key used for column selection and output
-        results = {key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()}
-
-    # set the final keys
-    keys = list(arg.keys())
-
-    # Avoid making two isinstance calls in all and any below
-    is_ndframe = [isinstance(r, ABCNDFrame) for r in results.values()]
-
-    # combine results
-    if all(is_ndframe):
-        keys_to_use = [k for k in keys if not results[k].empty]
-        # Have to check, if at least one DataFrame is not empty.
-        keys_to_use = keys_to_use if keys_to_use != [] else keys
-        axis = 0 if isinstance(obj, ABCSeries) else 1
-        result = concat({k: results[k] for k in keys_to_use}, axis=axis)
-    elif any(is_ndframe):
-        # There is a mix of NDFrames and scalars
-        raise ValueError(
-            "cannot perform both aggregation "
-            "and transformation operations "
-            "simultaneously"
-        )
-    else:
-        from pandas import Series
-
-        # we have a dict of scalars
-        # GH 36212 use name only if obj is a series
-        if obj.ndim == 1:
-            obj = cast("Series", obj)
-            name = obj.name
-        else:
-            name = None
-
-        result = Series(results, name=name)
-
-    return result