pandas-dev
diff --git a/‎.github/workflows/posix.yml
Lines changed: 10 additions & 1 deletion b/‎.github/workflows/posix.yml
Lines changed: 10 additions & 1 deletion
diff --git a/‎doc/source/user_guide/cookbook.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/user_guide/cookbook.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v1.5.0.rst
Lines changed: 111 additions & 4 deletions b/‎doc/source/whatsnew/v1.5.0.rst
Lines changed: 111 additions & 4 deletions
diff --git a/‎pandas/__init__.py
Lines changed: 13 additions & 13 deletions b/‎pandas/__init__.py
Lines changed: 13 additions & 13 deletions
diff --git a/‎pandas/_libs/algos.pyi
Lines changed: 1 addition & 0 deletions b/‎pandas/_libs/algos.pyi
Lines changed: 1 addition & 0 deletions
diff --git a/‎pandas/_libs/algos.pyx
Lines changed: 7 additions & 2 deletions b/‎pandas/_libs/algos.pyx
Lines changed: 7 additions & 2 deletions
diff --git a/‎pandas/_libs/groupby.pyi
Lines changed: 1 addition & 0 deletions b/‎pandas/_libs/groupby.pyi
Lines changed: 1 addition & 0 deletions
diff --git a/‎pandas/_libs/groupby.pyx
Lines changed: 12 additions & 3 deletions b/‎pandas/_libs/groupby.pyx
Lines changed: 12 additions & 3 deletions
@@ -28,7 +28,7 @@ jobs:
         pattern: ["not single_cpu", "single_cpu"]
         # Don't test pyarrow v2/3: Causes timeouts in read_csv engine
         # even if tests are skipped/xfailed
-        pyarrow_version: ["5", "7"]
+        pyarrow_version: ["5", "6", "7"]
         include:
           - name: "Downstream Compat"
             env_file: actions-38-downstream_compat.yaml
@@ -62,6 +62,15 @@ jobs:
             pattern: "not slow and not network and not single_cpu"
             pandas_testing_mode: "deprecate"
             test_args: "-W error::DeprecationWarning:numpy"
+        exclude:
+          - env_file: actions-39.yaml
+            pyarrow_version: "6"
+          - env_file: actions-39.yaml
+            pyarrow_version: "7"
+          - env_file: actions-310.yaml
+            pyarrow_version: "6"
+          - env_file: actions-310.yaml
+            pyarrow_version: "7"
       fail-fast: false
     name: ${{ matrix.name || format('{0} pyarrow={1} {2}', matrix.env_file, matrix.pyarrow_version, matrix.pattern) }}
     env:
 
@@ -511,7 +511,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
 
    def replace(g):
        mask = g < 0
-       return g.where(mask, g[~mask].mean())
+       return g.where(~mask, g[~mask].mean())
 
    gb.transform(replace)
 
 
@@ -100,6 +100,31 @@ as seen in the following example.
                1 2021-01-02 08:00:00  4
                2 2021-01-02 16:00:00  5
 
+.. _whatsnew_150.enhancements.tar:
+
+Reading directly from TAR archives
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+I/O methods like :func:`read_csv` or :meth:`DataFrame.to_json` now allow reading and writing
+directly on TAR archives (:issue:`44787`).
+
+.. code-block:: python
+
+   df = pd.read_csv("./movement.tar.gz")
+   # ...
+   df.to_csv("./out.tar.gz")
+
+This supports ``.tar``, ``.tar.gz``, ``.tar.bz`` and ``.tar.xz2`` archives.
+The used compression method is inferred from the filename.
+If the compression method cannot be inferred, use the ``compression`` argument:
+
+.. code-block:: python
+
+   df = pd.read_csv(some_file_obj, compression={"method": "tar", "mode": "r:gz"}) # noqa F821
+
+(``mode`` being one of ``tarfile.open``'s modes: https://docs.python.org/3/library/tarfile.html#tarfile.open)
+
+
 .. _whatsnew_150.enhancements.other:
 
 Other enhancements
@@ -120,7 +145,7 @@ Other enhancements
 - :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`)
 - :meth:`pd.concat` now raises when ``levels`` is given but ``keys`` is None (:issue:`46653`)
 - :meth:`pd.concat` now raises when ``levels`` contains duplicate values (:issue:`46653`)
-- Added ``numeric_only`` argument to :meth:`DataFrame.corr`, :meth:`DataFrame.corrwith`, :meth:`DataFrame.cov`, :meth:`DataFrame.idxmin`, :meth:`DataFrame.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.var`, :meth:`.GroupBy.std`, :meth:`.GroupBy.sem`, and :meth:`.GroupBy.quantile` (:issue:`46560`)
+- Added ``numeric_only`` argument to :meth:`DataFrame.corr`, :meth:`DataFrame.corrwith`, :meth:`DataFrame.cov`, :meth:`DataFrame.idxmin`, :meth:`DataFrame.idxmax`, :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.GroupBy.var`, :meth:`.GroupBy.std`, :meth:`.GroupBy.sem`, and :meth:`.DataFrameGroupBy.quantile` (:issue:`46560`)
 - A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`, :issue:`46725`)
 - Added ``validate`` argument to :meth:`DataFrame.join` (:issue:`46622`)
 - A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`)
@@ -194,10 +219,47 @@ did not have the same index as the input.
     df.groupby('a', dropna=True).transform('ffill')
     df.groupby('a', dropna=True).transform(lambda x: x)
 
-.. _whatsnew_150.notable_bug_fixes.notable_bug_fix2:
+.. _whatsnew_150.notable_bug_fixes.to_json_incorrectly_localizing_naive_timestamps:
 
-notable_bug_fix2
-^^^^^^^^^^^^^^^^
+Serializing tz-naive Timestamps with to_json() with ``iso_dates=True``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+:meth:`DataFrame.to_json`, :meth:`Series.to_json`, and :meth:`Index.to_json`
+would incorrectly localize DatetimeArrays/DatetimeIndexes with tz-naive Timestamps
+to UTC. (:issue:`38760`)
+
+Note that this patch does not fix the localization of tz-aware Timestamps to UTC
+upon serialization. (Related issue :issue:`12997`)
+
+*Old Behavior*
+
+.. ipython:: python
+
+    index = pd.date_range(
+        start='2020-12-28 00:00:00',
+        end='2020-12-28 02:00:00',
+        freq='1H',
+    )
+    a = pd.Series(
+        data=range(3),
+        index=index,
+    )
+
+.. code-block:: ipython
+
+    In [4]: a.to_json(date_format='iso')
+    Out[4]: '{"2020-12-28T00:00:00.000Z":0,"2020-12-28T01:00:00.000Z":1,"2020-12-28T02:00:00.000Z":2}'
+
+    In [5]: pd.read_json(a.to_json(date_format='iso'), typ="series").index == a.index
+    Out[5]: array([False, False, False])
+
+*New Behavior*
+
+.. ipython:: python
+
+    a.to_json(date_format='iso')
+    # Roundtripping now works
+    pd.read_json(a.to_json(date_format='iso'), typ="series").index == a.index
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_150.api_breaking:
@@ -426,6 +488,48 @@ As ``group_keys=True`` is the default value of :meth:`DataFrame.groupby` and
 raise a ``FutureWarning``. This can be silenced and the previous behavior
 retained by specifying ``group_keys=False``.
 
+.. _whatsnew_150.deprecations.numeric_only_default:
+
+``numeric_only`` default value
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Across the DataFrame operations such as ``min``, ``sum``, and ``idxmax``, the default
+value of the ``numeric_only`` argument, if it exists at all, was inconsistent.
+Furthermore, operations with the default value ``None`` can lead to surprising
+results. (:issue:`46560`)
+
+.. code-block:: ipython
+
+    In [1]: df = pd.DataFrame({"a": [1, 2], "b": ["x", "y"]})
+
+    In [2]: # Reading the next line without knowing the contents of df, one would
+            # expect the result to contain the products for both columns a and b.
+            df[["a", "b"]].prod()
+    Out[2]:
+    a    2
+    dtype: int64
+
+To avoid this behavior, the specifying the value ``numeric_only=None`` has been
+deprecated, and will be removed in a future version of pandas. In the future,
+all operations with a ``numeric_only`` argument will default to ``False``. Users
+should either call the operation only with columns that can be operated on, or
+specify ``numeric_only=True`` to operate only on Boolean, integer, and float columns.
+
+In order to support the transition to the new behavior, the following methods have
+gained the ``numeric_only`` argument.
+
+- :meth:`DataFrame.corr`
+- :meth:`DataFrame.corrwith`
+- :meth:`DataFrame.cov`
+- :meth:`DataFrame.idxmin`
+- :meth:`DataFrame.idxmax`
+- :meth:`.DataFrameGroupBy.idxmin`
+- :meth:`.DataFrameGroupBy.idxmax`
+- :meth:`.GroupBy.var`
+- :meth:`.GroupBy.std`
+- :meth:`.GroupBy.sem`
+- :meth:`.DataFrameGroupBy.quantile`
+
 .. _whatsnew_150.deprecations.other:
 
 Other Deprecations
@@ -448,6 +552,7 @@ Other Deprecations
 - Deprecated passing arguments as positional in :meth:`DataFrame.any` and :meth:`Series.any` (:issue:`44802`)
 - Deprecated the ``closed`` argument in :meth:`interval_range` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`)
 - Deprecated the methods :meth:`DataFrame.mad`, :meth:`Series.mad`, and the corresponding groupby methods (:issue:`11787`)
+- Deprecated positional arguments to :meth:`Index.join` except for ``other``, use keyword-only arguments instead of positional arguments (:issue:`46518`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_150.performance:
@@ -629,8 +734,10 @@ Groupby/resample/rolling
 - Bug in :meth:`Rolling.var` and :meth:`Rolling.std` would give non-zero result with window of same values (:issue:`42064`)
 - Bug in :meth:`.Rolling.var` would segfault calculating weighted variance when window size was larger than data size (:issue:`46760`)
 - Bug in :meth:`Grouper.__repr__` where ``dropna`` was not included. Now it is (:issue:`46754`)
+- Bug in :meth:`DataFrame.rolling` gives ValueError when center=True, axis=1 and win_type is specified (:issue:`46135`)
 - Bug in :meth:`.DataFrameGroupBy.describe` and :meth:`.SeriesGroupBy.describe` produces inconsistent results for empty datasets (:issue:`41575`)
 
+
 Reshaping
 ^^^^^^^^^
 - Bug in :func:`concat` between a :class:`Series` with integer dtype and another with :class:`CategoricalDtype` with integer categories and containing ``NaN`` values casting to object dtype instead of ``float64`` (:issue:`45359`)
 
@@ -3,33 +3,33 @@
 __docformat__ = "restructuredtext"
 
 # Let users know if they're missing any of our hard dependencies
-hard_dependencies = ("numpy", "pytz", "dateutil")
-missing_dependencies = []
+_hard_dependencies = ("numpy", "pytz", "dateutil")
+_missing_dependencies = []
 
-for dependency in hard_dependencies:
+for _dependency in _hard_dependencies:
     try:
-        __import__(dependency)
-    except ImportError as e:
-        missing_dependencies.append(f"{dependency}: {e}")
+        __import__(_dependency)
+    except ImportError as _e:
+        _missing_dependencies.append(f"{_dependency}: {_e}")
 
-if missing_dependencies:
+if _missing_dependencies:
     raise ImportError(
-        "Unable to import required dependencies:\n" + "\n".join(missing_dependencies)
+        "Unable to import required dependencies:\n" + "\n".join(_missing_dependencies)
     )
-del hard_dependencies, dependency, missing_dependencies
+del _hard_dependencies, _dependency, _missing_dependencies
 
 # numpy compat
 from pandas.compat import is_numpy_dev as _is_numpy_dev
 
 try:
     from pandas._libs import hashtable as _hashtable, lib as _lib, tslib as _tslib
-except ImportError as err:  # pragma: no cover
-    module = err.name
+except ImportError as _err:  # pragma: no cover
+    _module = _err.name
     raise ImportError(
-        f"C extension: {module} not built. If you want to import "
+        f"C extension: {_module} not built. If you want to import "
         "pandas from the source directory, you may need to run "
         "'python setup.py build_ext --force' to build the C extensions first."
-    ) from err
+    ) from _err
 else:
     del _tslib, _lib, _hashtable
 
 
@@ -109,6 +109,7 @@ def rank_1d(
     ascending: bool = ...,
     pct: bool = ...,
     na_option=...,
+    mask: npt.NDArray[np.bool_] | None = ...,
 ) -> np.ndarray: ...  # np.ndarray[float64_t, ndim=1]
 def rank_2d(
     in_arr: np.ndarray,  # ndarray[numeric_object_t, ndim=2]
 
@@ -889,6 +889,7 @@ def rank_1d(
     bint ascending=True,
     bint pct=False,
     na_option="keep",
+    const uint8_t[:] mask=None,
 ):
     """
     Fast NaN-friendly version of ``scipy.stats.rankdata``.
@@ -918,6 +919,8 @@ def rank_1d(
         * keep: leave NA values where they are
         * top: smallest rank if ascending
         * bottom: smallest rank if descending
+    mask : np.ndarray[bool], optional, default None
+        Specify locations to be treated as NA, for e.g. Categorical.
     """
     cdef:
         TiebreakEnumType tiebreak
@@ -927,7 +930,6 @@ def rank_1d(
         float64_t[::1] out
         ndarray[numeric_object_t, ndim=1] masked_vals
         numeric_object_t[:] masked_vals_memview
-        uint8_t[:] mask
         bint keep_na, nans_rank_highest, check_labels, check_mask
         numeric_object_t nan_fill_val
 
@@ -956,6 +958,7 @@ def rank_1d(
         or numeric_object_t is object
         or (numeric_object_t is int64_t and is_datetimelike)
     )
+    check_mask = check_mask or mask is not None
 
     # Copy values into new array in order to fill missing data
     # with mask, without obfuscating location of missing data
@@ -965,7 +968,9 @@ def rank_1d(
     else:
         masked_vals = values.copy()
 
-    if numeric_object_t is object:
+    if mask is not None:
+        pass
+    elif numeric_object_t is object:
         mask = missing.isnaobj(masked_vals)
     elif numeric_object_t is int64_t and is_datetimelike:
         mask = (masked_vals == NPY_NAT).astype(np.uint8)
 
@@ -128,6 +128,7 @@ def group_rank(
     ascending: bool = ...,
     pct: bool = ...,
     na_option: Literal["keep", "top", "bottom"] = ...,
+    mask: npt.NDArray[np.bool_] | None = ...,
 ) -> None: ...
 def group_max(
     out: np.ndarray,  # groupby_t[:, ::1]
 
@@ -1262,6 +1262,7 @@ def group_rank(
     bint ascending=True,
     bint pct=False,
     str na_option="keep",
+    const uint8_t[:, :] mask=None,
 ) -> None:
     """
     Provides the rank of values within each group.
@@ -1294,6 +1295,7 @@ def group_rank(
         * keep: leave NA values where they are
         * top: smallest rank if ascending
         * bottom: smallest rank if descending
+    mask : np.ndarray[bool] or None, default None
 
     Notes
     -----
@@ -1302,22 +1304,29 @@ def group_rank(
     cdef:
         Py_ssize_t i, k, N
         ndarray[float64_t, ndim=1] result
+        const uint8_t[:] sub_mask
 
     N = values.shape[1]
 
     for k in range(N):
+        if mask is None:
+            sub_mask = None
+        else:
+            sub_mask = mask[:, k]
+
         result = rank_1d(
             values=values[:, k],
             labels=labels,
             is_datetimelike=is_datetimelike,
             ties_method=ties_method,
             ascending=ascending,
             pct=pct,
-            na_option=na_option
+            na_option=na_option,
+            mask=sub_mask,
         )
         for i in range(len(result)):
-            # TODO: why can't we do out[:, k] = result?
-            out[i, k] = result[i]
+            if labels[i] >= 0:
+                out[i, k] = result[i]
 
 
 # ----------------------------------------------------------------------