pandas-dev
diff --git a/‎.pre-commit-config.yaml
Lines changed: 12 additions & 6 deletions b/‎.pre-commit-config.yaml
Lines changed: 12 additions & 6 deletions
diff --git a/‎doc/source/getting_started/install.rst
Lines changed: 22 additions & 22 deletions b/‎doc/source/getting_started/install.rst
Lines changed: 22 additions & 22 deletions
diff --git a/‎doc/source/user_guide/groupby.rst
Lines changed: 11 additions & 0 deletions b/‎doc/source/user_guide/groupby.rst
Lines changed: 11 additions & 0 deletions
diff --git a/‎doc/source/whatsnew/v1.4.4.rst
Lines changed: 1 addition & 0 deletions b/‎doc/source/whatsnew/v1.4.4.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/source/whatsnew/v1.5.0.rst
Lines changed: 6 additions & 1 deletion b/‎doc/source/whatsnew/v1.5.0.rst
Lines changed: 6 additions & 1 deletion
diff --git a/‎environment.yml
Lines changed: 1 addition & 1 deletion b/‎environment.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_config/dates.py
Lines changed: 2 additions & 0 deletions b/‎pandas/_config/dates.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/_libs/algos.pyi
Lines changed: 1 addition & 8 deletions b/‎pandas/_libs/algos.pyi
Lines changed: 1 addition & 8 deletions
diff --git a/‎pandas/_libs/groupby.pyi
Lines changed: 13 additions & 3 deletions b/‎pandas/_libs/groupby.pyi
Lines changed: 13 additions & 3 deletions
diff --git a/‎pandas/_libs/internals.pyi
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/internals.pyi
Lines changed: 1 addition & 1 deletion
@@ -94,8 +94,6 @@ repos:
         stages: [manual]
         additional_dependencies: &pyright_dependencies
         - pyright@1.1.258
--   repo: local
-    hooks:
     -   id: pyright_reportGeneralTypeIssues
         name: pyright reportGeneralTypeIssues
         entry: pyright --skipunannotated -p pyright_reportGeneralTypeIssues.json
@@ -105,8 +103,6 @@ repos:
         types: [python]
         stages: [manual]
         additional_dependencies: *pyright_dependencies
--   repo: local
-    hooks:
     -   id: mypy
         name: mypy
         entry: mypy
@@ -115,8 +111,6 @@ repos:
         pass_filenames: false
         types: [python]
         stages: [manual]
--   repo: local
-    hooks:
     -   id: flake8-rst
         name: flake8-rst
         description: Run flake8 on code snippets in docstrings or RST files
@@ -237,3 +231,15 @@ repos:
         additional_dependencies:
         - flake8==4.0.1
         - flake8-pyi==22.5.1
+    -   id: future-annotations
+        name: import annotations from __future__
+        entry: 'from __future__ import annotations'
+        language: pygrep
+        args: [--negate]
+        files: ^pandas/
+        types: [python]
+        exclude: |
+            (?x)
+            /(__init__\.py)|(api\.py)|(_version\.py)|(testing\.py)|(conftest\.py)$
+            |/tests/
+            |/_testing/
@@ -199,7 +199,7 @@ the code base as of this writing. To run it on your machine to verify that
 everything is working (and that you have all of the dependencies, soft and hard,
 installed), make sure you have `pytest
 <https://docs.pytest.org/en/latest/>`__ >= 6.0 and `Hypothesis
-<https://hypothesis.readthedocs.io/en/latest/>`__ >= 3.58, then run:
+<https://hypothesis.readthedocs.io/en/latest/>`__ >= 6.13.0, then run:
 
 ::
 
@@ -247,11 +247,11 @@ Recommended dependencies
 
 * `numexpr <https://github.com/pydata/numexpr>`__: for accelerating certain numerical operations.
   ``numexpr`` uses multiple cores as well as smart chunking and caching to achieve large speedups.
-  If installed, must be Version 2.7.1 or higher.
+  If installed, must be Version 2.7.3 or higher.
 
 * `bottleneck <https://github.com/pydata/bottleneck>`__: for accelerating certain types of ``nan``
   evaluations. ``bottleneck`` uses specialized cython routines to achieve large speedups. If installed,
-  must be Version 1.3.1 or higher.
+  must be Version 1.3.2 or higher.
 
 .. note::
 
@@ -277,8 +277,8 @@ Visualization
 Dependency                Minimum Version    Notes
 ========================= ================== =============================================================
 matplotlib                3.3.2              Plotting library
-Jinja2                    2.11               Conditional formatting with DataFrame.style
-tabulate                  0.8.7              Printing in Markdown-friendly format (see `tabulate`_)
+Jinja2                    3.0.0              Conditional formatting with DataFrame.style
+tabulate                  0.8.9              Printing in Markdown-friendly format (see `tabulate`_)
 ========================= ================== =============================================================
 
 Computation
@@ -287,10 +287,10 @@ Computation
 ========================= ================== =============================================================
 Dependency                Minimum Version    Notes
 ========================= ================== =============================================================
-SciPy                     1.4.1              Miscellaneous statistical functions
-numba                     0.50.1             Alternative execution engine for rolling operations
+SciPy                     1.7.1              Miscellaneous statistical functions
+numba                     0.53.1             Alternative execution engine for rolling operations
                                              (see :ref:`Enhancing Performance <enhancingperf.numba>`)
-xarray                    0.15.1             pandas-like API for N-dimensional data
+xarray                    0.19.0             pandas-like API for N-dimensional data
 ========================= ================== =============================================================
 
 Excel files
@@ -301,9 +301,9 @@ Dependency                Minimum Version    Notes
 ========================= ================== =============================================================
 xlrd                      2.0.1              Reading Excel
 xlwt                      1.3.0              Writing Excel
-xlsxwriter                1.2.2              Writing Excel
-openpyxl                  3.0.3              Reading / writing for xlsx files
-pyxlsb                    1.0.6              Reading for xlsb files
+xlsxwriter                1.4.3              Writing Excel
+openpyxl                  3.0.7              Reading / writing for xlsx files
+pyxlsb                    1.0.8              Reading for xlsb files
 ========================= ================== =============================================================
 
 HTML
@@ -312,9 +312,9 @@ HTML
 ========================= ================== =============================================================
 Dependency                Minimum Version    Notes
 ========================= ================== =============================================================
-BeautifulSoup4            4.8.2              HTML parser for read_html
+BeautifulSoup4            4.9.3              HTML parser for read_html
 html5lib                  1.1                HTML parser for read_html
-lxml                      4.5.0              HTML parser for read_html
+lxml                      4.6.3              HTML parser for read_html
 ========================= ================== =============================================================
 
 One of the following combinations of libraries is needed to use the
@@ -356,9 +356,9 @@ SQL databases
 ========================= ================== =============================================================
 Dependency                Minimum Version    Notes
 ========================= ================== =============================================================
-SQLAlchemy                1.4.0               SQL support for databases other than sqlite
-psycopg2                  2.8.4               PostgreSQL engine for sqlalchemy
-pymysql                   0.10.1              MySQL engine for sqlalchemy
+SQLAlchemy                1.4.16             SQL support for databases other than sqlite
+psycopg2                  2.8.6              PostgreSQL engine for sqlalchemy
+pymysql                   1.0.2              MySQL engine for sqlalchemy
 ========================= ================== =============================================================
 
 Other data sources
@@ -368,11 +368,11 @@ Other data sources
 Dependency                Minimum Version    Notes
 ========================= ================== =============================================================
 PyTables                  3.6.1              HDF5-based reading / writing
-blosc                     1.20.1             Compression for HDF5
+blosc                     1.21.0             Compression for HDF5
 zlib                                         Compression for HDF5
 fastparquet               0.4.0              Parquet reading / writing
 pyarrow                   1.0.1              Parquet, ORC, and feather reading / writing
-pyreadstat                1.1.0              SPSS files (.sav) reading
+pyreadstat                1.1.2              SPSS files (.sav) reading
 ========================= ================== =============================================================
 
 .. _install.warn_orc:
@@ -396,10 +396,10 @@ Access data in the cloud
 ========================= ================== =============================================================
 Dependency                Minimum Version    Notes
 ========================= ================== =============================================================
-fsspec                    0.7.4              Handling files aside from simple local and HTTP
-gcsfs                     0.6.0              Google Cloud Storage access
-pandas-gbq                0.14.0             Google Big Query access
-s3fs                      0.4.0              Amazon S3 access
+fsspec                    2021.5.0           Handling files aside from simple local and HTTP
+gcsfs                     2021.5.0           Google Cloud Storage access
+pandas-gbq                0.15.0             Google Big Query access
+s3fs                      2021.05.0          Amazon S3 access
 ========================= ================== =============================================================
 
 Clipboard
 
@@ -345,6 +345,17 @@ Index level names may be supplied as keys.
 
 More on the ``sum`` function and aggregation later.
 
+When using ``.groupby()`` on a DatFrame with a  MultiIndex, do not specify both ``by`` and ``level``.
+The argument validation should be done in ``.groupby()``, using the name of the specific index.
+
+.. ipython:: python
+
+   df = pd.DataFrame({"col1": ["a", "b", "c"]})
+   df.index = pd.MultiIndex.from_arrays([["a", "a", "b"],
+                                        [1, 2, 1]],
+                                        names=["x", "y"])
+   df.groupby(["col1", "x"])
+
 Grouping DataFrame with Index levels and columns
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 A DataFrame may be grouped by a combination of columns and index levels by
 
@@ -15,6 +15,7 @@ including other versions of pandas.
 Fixed regressions
 ~~~~~~~~~~~~~~~~~
 - Fixed regression in :func:`concat` materializing :class:`Index` during sorting even if :class:`Index` was already sorted (:issue:`47501`)
+- Fixed regression in setting ``None`` or non-string value into a ``string``-dtype Series using a mask (:issue:`47628`)
 -
 
 .. ---------------------------------------------------------------------------
 
@@ -278,6 +278,7 @@ Other enhancements
 - :meth:`DatetimeIndex.astype` now supports casting timezone-naive indexes to ``datetime64[s]``, ``datetime64[ms]``, and ``datetime64[us]``, and timezone-aware indexes to the corresponding ``datetime64[unit, tzname]`` dtypes (:issue:`47579`)
 - :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`)
 - :meth:`RangeIndex.union` now can return a :class:`RangeIndex` instead of a :class:`Int64Index` if the resulting values are equally spaced (:issue:`47557`, :issue:`43885`)
+- :meth:`DataFrame.compare` now accepts an argument ``result_names`` to allow the user to specify the result's names of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_150.notable_bug_fixes:
@@ -845,7 +846,7 @@ Numeric
 - Bug in operations with array-likes with ``dtype="boolean"`` and :attr:`NA` incorrectly altering the array in-place (:issue:`45421`)
 - Bug in division, ``pow`` and ``mod`` operations on array-likes with ``dtype="boolean"`` not being like their ``np.bool_`` counterparts (:issue:`46063`)
 - Bug in multiplying a :class:`Series` with ``IntegerDtype`` or ``FloatingDtype`` by an array-like with ``timedelta64[ns]`` dtype incorrectly raising (:issue:`45622`)
--
+- Bug in :meth:`mean` where the optional dependency ``bottleneck`` causes precision loss linear in the length of the array. ``bottleneck`` has been disabled for :meth:`mean` improving the loss to log-linear but may result in a performance decrease. (:issue:`42878`)
 
 Conversion
 ^^^^^^^^^^
@@ -913,6 +914,7 @@ Missing
 ^^^^^^^
 - Bug in :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``downcast`` keyword not being respected in some cases where there are no NA values present (:issue:`45423`)
 - Bug in :meth:`Series.fillna` and :meth:`DataFrame.fillna` with :class:`IntervalDtype` and incompatible value raising instead of casting to a common (usually object) dtype (:issue:`45796`)
+- Bug in :meth:`Series.map` not respecting ``na_action`` argument if mapper is a ``dict`` or :class:`Series` (:issue:`47527`)
 - Bug in :meth:`DataFrame.interpolate` with object-dtype column not returning a copy with ``inplace=False`` (:issue:`45791`)
 - Bug in :meth:`DataFrame.dropna` allows to set both ``how`` and ``thresh`` incompatible arguments (:issue:`46575`)
 - Bug in :meth:`DataFrame.fillna` ignored ``axis`` when :class:`DataFrame` is single block (:issue:`47713`)
@@ -955,6 +957,7 @@ I/O
 - Bug in :func:`read_sas` that scrambled column names (:issue:`31243`)
 - Bug in :func:`read_sas` with RLE-compressed SAS7BDAT files that contain 0x00 control bytes (:issue:`47099`)
 - Bug in :func:`read_parquet` with ``use_nullable_dtypes=True`` where ``float64`` dtype was returned instead of nullable ``Float64`` dtype (:issue:`45694`)
+- Bug in :meth:`DataFrame.to_json` where ``PeriodDtype`` would not make the serialization roundtrip when read back with :meth:`read_json` (:issue:`44720`)
 
 Period
 ^^^^^^
@@ -977,6 +980,7 @@ Plotting
 - The function :meth:`DataFrame.plot.scatter` now accepts ``color`` as an alias for ``c`` and ``size`` as an alias for ``s`` for consistency to other plotting functions (:issue:`44670`)
 - Fix showing "None" as ylabel in :meth:`Series.plot` when not setting ylabel (:issue:`46129`)
 - Bug in :meth:`DataFrame.plot` that led to xticks and vertical grids being improperly placed when plotting a quarterly series (:issue:`47602`)
+- Bug in :meth:`DataFrame.plot` that prevented setting y-axis label, limits and ticks for a secondary y-axis (:issue:`47753`)
 
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
@@ -1018,6 +1022,7 @@ Reshaping
 - Bug in :meth:`DataFrame.join` with a list when using suffixes to join DataFrames with duplicate column names (:issue:`46396`)
 - Bug in :meth:`DataFrame.pivot_table` with ``sort=False`` results in sorted index (:issue:`17041`)
 - Bug in :meth:`concat` when ``axis=1`` and ``sort=False`` where the resulting Index was a :class:`Int64Index` instead of a :class:`RangeIndex` (:issue:`46675`)
+- Bug in :meth:`wide_to_long` raises when ``stubnames`` is missing in columns and ``i`` contains string dtype column (:issue:`46044`)
 
 Sparse
 ^^^^^^
 
@@ -127,4 +127,4 @@ dependencies:
   # build the interactive terminal
   - jupyterlab >=3.4,<4
   - pip:
-      - jupyterlite==0.1.0b9
+      - jupyterlite==0.1.0b10
@@ -1,6 +1,8 @@
 """
 config for datetime formatting
 """
+from __future__ import annotations
+
 from pandas._config import config as cf
 
 pc_date_dayfirst_doc = """
 
@@ -42,7 +42,7 @@ def groupsort_indexer(
     np.ndarray,  # ndarray[int64_t, ndim=1]
 ]: ...
 def kth_smallest(
-    a: np.ndarray,  # numeric[:]
+    arr: np.ndarray,  # numeric[:]
     k: int,
 ) -> Any: ...  # numeric
 
@@ -129,18 +129,11 @@ def diff_2d(
 ) -> None: ...
 def ensure_platform_int(arr: object) -> npt.NDArray[np.intp]: ...
 def ensure_object(arr: object) -> npt.NDArray[np.object_]: ...
-def ensure_complex64(arr: object, copy=...) -> npt.NDArray[np.complex64]: ...
-def ensure_complex128(arr: object, copy=...) -> npt.NDArray[np.complex128]: ...
 def ensure_float64(arr: object, copy=...) -> npt.NDArray[np.float64]: ...
-def ensure_float32(arr: object, copy=...) -> npt.NDArray[np.float32]: ...
 def ensure_int8(arr: object, copy=...) -> npt.NDArray[np.int8]: ...
 def ensure_int16(arr: object, copy=...) -> npt.NDArray[np.int16]: ...
 def ensure_int32(arr: object, copy=...) -> npt.NDArray[np.int32]: ...
 def ensure_int64(arr: object, copy=...) -> npt.NDArray[np.int64]: ...
-def ensure_uint8(arr: object, copy=...) -> npt.NDArray[np.uint8]: ...
-def ensure_uint16(arr: object, copy=...) -> npt.NDArray[np.uint16]: ...
-def ensure_uint32(arr: object, copy=...) -> npt.NDArray[np.uint32]: ...
-def ensure_uint64(arr: object, copy=...) -> npt.NDArray[np.uint64]: ...
 def take_1d_int8_int8(
     values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
 ) -> None: ...
 
@@ -105,26 +105,28 @@ def group_last(
     values: np.ndarray,  # ndarray[rank_t, ndim=2]
     labels: np.ndarray,  # const int64_t[:]
     mask: npt.NDArray[np.bool_] | None,
-    result_mask: npt.NDArray[np.bool_] | None,
+    result_mask: npt.NDArray[np.bool_] | None = ...,
     min_count: int = ...,  # Py_ssize_t
+    is_datetimelike: bool = ...,
 ) -> None: ...
 def group_nth(
     out: np.ndarray,  # rank_t[:, ::1]
     counts: np.ndarray,  # int64_t[::1]
     values: np.ndarray,  # ndarray[rank_t, ndim=2]
     labels: np.ndarray,  # const int64_t[:]
     mask: npt.NDArray[np.bool_] | None,
-    result_mask: npt.NDArray[np.bool_] | None,
+    result_mask: npt.NDArray[np.bool_] | None = ...,
     min_count: int = ...,  # int64_t
     rank: int = ...,  # int64_t
+    is_datetimelike: bool = ...,
 ) -> None: ...
 def group_rank(
     out: np.ndarray,  # float64_t[:, ::1]
     values: np.ndarray,  # ndarray[rank_t, ndim=2]
     labels: np.ndarray,  # const int64_t[:]
     ngroups: int,
     is_datetimelike: bool,
-    ties_method: Literal["aveage", "min", "max", "first", "dense"] = ...,
+    ties_method: Literal["average", "min", "max", "first", "dense"] = ...,
     ascending: bool = ...,
     pct: bool = ...,
     na_option: Literal["keep", "top", "bottom"] = ...,
@@ -136,6 +138,7 @@ def group_max(
     values: np.ndarray,  # ndarray[groupby_t, ndim=2]
     labels: np.ndarray,  # const int64_t[:]
     min_count: int = ...,
+    is_datetimelike: bool = ...,
     mask: np.ndarray | None = ...,
     result_mask: np.ndarray | None = ...,
 ) -> None: ...
@@ -145,6 +148,7 @@ def group_min(
     values: np.ndarray,  # ndarray[groupby_t, ndim=2]
     labels: np.ndarray,  # const int64_t[:]
     min_count: int = ...,
+    is_datetimelike: bool = ...,
     mask: np.ndarray | None = ...,
     result_mask: np.ndarray | None = ...,
 ) -> None: ...
@@ -154,11 +158,17 @@ def group_cummin(
     labels: np.ndarray,  # const int64_t[:]
     ngroups: int,
     is_datetimelike: bool,
+    mask: np.ndarray | None = ...,
+    result_mask: np.ndarray | None = ...,
+    skipna: bool = ...,
 ) -> None: ...
 def group_cummax(
     out: np.ndarray,  # groupby_t[:, ::1]
     values: np.ndarray,  # ndarray[groupby_t, ndim=2]
     labels: np.ndarray,  # const int64_t[:]
     ngroups: int,
     is_datetimelike: bool,
+    mask: np.ndarray | None = ...,
+    result_mask: np.ndarray | None = ...,
+    skipna: bool = ...,
 ) -> None: ...
@@ -32,7 +32,7 @@ def update_blklocs_and_blknos(
     loc: int,
     nblocks: int,
 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
-
+@final
 class BlockPlacement:
     def __init__(self, val: int | slice | np.ndarray): ...
     @property
Original file line number	Diff line number	Diff line change
`@@ -15,6 +15,7 @@ including other versions of pandas.`
`15`	`15`	`Fixed regressions`
`16`	`16`	`~~~~~~~~~~~~~~~~~`
`17`	`17`	- Fixed regression in :func:`concat` materializing :class:`Index` during sorting even if :class:`Index` was already sorted (:issue:`47501`)
	`18`	+- Fixed regression in setting ``None`` or non-string value into a ``string``-dtype Series using a mask (:issue:`47628`)
`18`	`19`	`-`
`19`	`20`
`20`	`21`	`.. ---------------------------------------------------------------------------`