pandas-dev · codamuse · Nov 9, 2022 · Nov 10, 2022 · Nov 10, 2022 · Nov 10, 2022
@@ -64,22 +64,22 @@ jobs:
         mkdir -m 700 -p ~/.ssh
         echo "${{ secrets.server_ssh_key }}" > ~/.ssh/id_rsa
         chmod 600 ~/.ssh/id_rsa
-        echo "${{ secrets.server_ip }} ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBE1Kkopomm7FHG5enATf7SgnpICZ4W2bw+Ho+afqin+w7sMcrsa0je7sbztFAV8YchDkiBKnWTG4cRT+KZgZCaY=" > ~/.ssh/known_hosts
+        echo "${{ secrets.server_ip }} ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBFjYkJBk7sos+r7yATODogQc3jUdW1aascGpyOD4bohj8dWjzwLJv/OJ/fyOQ5lmj81WKDk67tGtqNJYGL9acII=" > ~/.ssh/known_hosts
       if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/'))
 
     - name: Copy cheatsheets into site directory
       run: cp doc/cheatsheet/Pandas_Cheat_Sheet* web/build/
 
     - name: Upload web
-      run: rsync -az --delete --exclude='pandas-docs' --exclude='docs' web/build/ docs@${{ secrets.server_ip }}:/usr/share/nginx/pandas
+      run: rsync -az --delete --exclude='pandas-docs' --exclude='docs' web/build/ web@${{ secrets.server_ip }}:/var/www/html
       if: github.event_name == 'push' && github.ref == 'refs/heads/main'
 
     - name: Upload dev docs
-      run: rsync -az --delete doc/build/html/ docs@${{ secrets.server_ip }}:/usr/share/nginx/pandas/pandas-docs/dev
+      run: rsync -az --delete doc/build/html/ web@${{ secrets.server_ip }}:/var/www/html/pandas-docs/dev
       if: github.event_name == 'push' && github.ref == 'refs/heads/main'
 
     - name: Upload prod docs
-      run: rsync -az --delete doc/build/html/ docs@${{ secrets.server_ip }}:/usr/share/nginx/pandas/pandas-docs/version/${GITHUB_REF_NAME:1}
+      run: rsync -az --delete doc/build/html/ web@${{ secrets.server_ip }}:/var/www/html/pandas-docs/version/${GITHUB_REF_NAME:1}
       if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
 
     - name: Move docs into site directory

diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
@@ -454,10 +454,10 @@ def setup(self, axis):
         )
 
     def time_count_level_multi(self, axis):
-        self.df.count(axis=axis, level=1)
+        self.df.count(axis=axis)
 
     def time_count_level_mixed_dtypes_multi(self, axis):
-        self.df_mixed.count(axis=axis, level=1)
+        self.df_mixed.count(axis=axis)
 
 
 class Apply:

diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
@@ -600,31 +600,35 @@ def time_frame_agg(self, dtype, method):
 
 
 class Cumulative:
-    param_names = ["dtype", "method"]
+    param_names = ["dtype", "method", "with_nans"]
     params = [
         ["float64", "int64", "Float64", "Int64"],
         ["cummin", "cummax", "cumsum"],
+        [True, False],
     ]
 
-    def setup(self, dtype, method):
+    def setup(self, dtype, method, with_nans):
+        if with_nans and dtype == "int64":
+            raise NotImplementedError("Construction of df would raise")
+
         N = 500_000
-        vals = np.random.randint(-10, 10, (N, 5))
-        null_vals = vals.astype(float, copy=True)
-        null_vals[::2, :] = np.nan
-        null_vals[::3, :] = np.nan
-        df = DataFrame(vals, columns=list("abcde"), dtype=dtype)
-        null_df = DataFrame(null_vals, columns=list("abcde"), dtype=dtype)
         keys = np.random.randint(0, 100, size=N)
-        df["key"] = keys
-        null_df["key"] = keys
-        self.df = df
-        self.null_df = null_df
+        vals = np.random.randint(-10, 10, (N, 5))
 
-    def time_frame_transform(self, dtype, method):
-        self.df.groupby("key").transform(method)
+        if with_nans:
+            null_vals = vals.astype(float, copy=True)
+            null_vals[::2, :] = np.nan
+            null_vals[::3, :] = np.nan
+            df = DataFrame(null_vals, columns=list("abcde"), dtype=dtype)
+            df["key"] = keys
+            self.df = df
+        else:
+            df = DataFrame(vals, columns=list("abcde")).astype(dtype, copy=False)
+            df["key"] = keys
+            self.df = df
 
-    def time_frame_transform_many_nulls(self, dtype, method):
-        self.null_df.groupby("key").transform(method)
+    def time_frame_transform(self, dtype, method, with_nans):
+        self.df.groupby("key").transform(method)
 
 
 class RankWithTies:

diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py
@@ -23,10 +23,10 @@ def time_op(self, op, dtype, axis):
 
 class FrameMultiIndexOps:
 
-    params = ([0, 1, [0, 1]], ops)
-    param_names = ["level", "op"]
+    params = [ops]
+    param_names = ["op"]
 
-    def setup(self, level, op):
+    def setup(self, op):
         levels = [np.arange(10), np.arange(100), np.arange(100)]
         codes = [
             np.arange(10).repeat(10000),
@@ -37,8 +37,8 @@ def setup(self, level, op):
         df = pd.DataFrame(np.random.randn(len(index), 4), index=index)
         self.df_func = getattr(df, op)
 
-    def time_op(self, level, op):
-        self.df_func(level=level)
+    def time_op(self, op):
+        self.df_func()
 
 
 class SeriesOps:
@@ -56,10 +56,10 @@ def time_op(self, op, dtype):
 
 class SeriesMultiIndexOps:
 
-    params = ([0, 1, [0, 1]], ops)
-    param_names = ["level", "op"]
+    params = [ops]
+    param_names = ["op"]
 
-    def setup(self, level, op):
+    def setup(self, op):
         levels = [np.arange(10), np.arange(100), np.arange(100)]
         codes = [
             np.arange(10).repeat(10000),
@@ -70,8 +70,8 @@ def setup(self, level, op):
         s = pd.Series(np.random.randn(len(index)), index=index)
         self.s_func = getattr(s, op)
 
-    def time_op(self, level, op):
-        self.s_func(level=level)
+    def time_op(self, op):
+        self.s_func()
 
 
 class Rank:

diff --git a/asv_bench/benchmarks/tslibs/tslib.py b/asv_bench/benchmarks/tslibs/tslib.py
@@ -51,7 +51,7 @@ class TimeIntsToPydatetime:
         _tzs,
     )
     param_names = ["box", "size", "tz"]
-    # TODO: fold? freq?
+    # TODO: fold?
 
     def setup(self, box, size, tz):
         if box == "date" and tz is not None:

@@ -137,6 +137,7 @@ want to clone your fork to your machine::
     git clone https://github.com/your-user-name/pandas.git pandas-yourname
     cd pandas-yourname
     git remote add upstream https://github.com/pandas-dev/pandas.git
+    git fetch upstream
 
 This creates the directory ``pandas-yourname`` and connects your repository to
 the upstream (main project) *pandas* repository.

diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst
@@ -139,6 +139,7 @@ Properties
    Timestamp.second
    Timestamp.tz
    Timestamp.tzinfo
+   Timestamp.unit
    Timestamp.value
    Timestamp.week
    Timestamp.weekofyear
@@ -149,6 +150,7 @@ Methods
 .. autosummary::
    :toctree: api/
 
+   Timestamp.as_unit
    Timestamp.astimezone
    Timestamp.ceil
    Timestamp.combine
@@ -242,6 +244,7 @@ Properties
    Timedelta.nanoseconds
    Timedelta.resolution
    Timedelta.seconds
+   Timedelta.unit
    Timedelta.value
    Timedelta.view
 
@@ -250,6 +253,7 @@ Methods
 .. autosummary::
    :toctree: api/
 
+   Timedelta.as_unit
    Timedelta.ceil
    Timedelta.floor
    Timedelta.isoformat

diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
@@ -1354,9 +1354,14 @@ This shows the first or last n rows from each group.
 Taking the nth row of each group
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-To select from a DataFrame or Series the nth item, use
-:meth:`~pd.core.groupby.DataFrameGroupBy.nth`. This is a reduction method, and
-will return a single row (or no row) per group if you pass an int for n:
+To select the nth item from each group, use :meth:`.DataFrameGroupBy.nth` or
+:meth:`.SeriesGroupBy.nth`. Arguments supplied can be any integer, lists of integers,
+slices, or lists of slices; see below for examples. When the nth element of a group
+does not exist an error is *not* raised; instead no corresponding rows are returned.
+
+In general this operation acts as a filtration. In certain cases it will also return
+one row per group, making it also a reduction. However because in general it can
+return zero or multiple rows per group, pandas treats it as a filtration in all cases.
 
 .. ipython:: python
 
@@ -1367,6 +1372,14 @@ will return a single row (or no row) per group if you pass an int for n:
    g.nth(-1)
    g.nth(1)
 
+If the nth element of a group does not exist, then no corresponding row is included
+in the result. In particular, if the specified ``n`` is larger than any group, the
+result will be an empty DataFrame.
+
+.. ipython:: python
+
+   g.nth(5)
+
 If you want to select the nth not-null item, use the ``dropna`` kwarg. For a DataFrame this should be either ``'any'`` or ``'all'`` just like you would pass to dropna:
 
 .. ipython:: python
@@ -1376,21 +1389,11 @@ If you want to select the nth not-null item, use the ``dropna`` kwarg. For a Dat
    g.first()
 
    # nth(-1) is the same as g.last()
-   g.nth(-1, dropna="any")  # NaNs denote group exhausted when using dropna
+   g.nth(-1, dropna="any")
    g.last()
 
    g.B.nth(0, dropna="all")
 
-As with other methods, passing ``as_index=False``, will achieve a filtration, which returns the grouped row.
-
-.. ipython:: python
-
-   df = pd.DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=["A", "B"])
-   g = df.groupby("A", as_index=False)
-
-   g.nth(0)
-   g.nth(-1)
-
 You can also select multiple rows from each group by specifying multiple nth values as a list of ints.
 
 .. ipython:: python
@@ -1400,6 +1403,13 @@ You can also select multiple rows from each group by specifying multiple nth val
    # get the first, 4th, and last date index for each month
    df.groupby([df.index.year, df.index.month]).nth([0, 3, -1])
 
+You may also use a slices or lists of slices.
+
+.. ipython:: python
+
+   df.groupby([df.index.year, df.index.month]).nth[1:]
+   df.groupby([df.index.year, df.index.month]).nth[1:, :-1]
+
 Enumerate group items
 ~~~~~~~~~~~~~~~~~~~~~
 

diff --git a/doc/source/whatsnew/v0.15.2.rst b/doc/source/whatsnew/v0.15.2.rst
@@ -154,11 +154,13 @@ Other enhancements:
 
 - ``Series.all`` and ``Series.any`` now support the ``level`` and ``skipna`` parameters (:issue:`8302`):
 
-  .. ipython:: python
-     :okwarning:
+  .. code-block:: python
 
-     s = pd.Series([False, True, False], index=[0, 0, 1])
-     s.any(level=0)
+     >>> s = pd.Series([False, True, False], index=[0, 0, 1])
+     >>> s.any(level=0)
+     0     True
+     1    False
+     dtype: bool
 
 - ``Panel`` now supports the ``all`` and ``any`` aggregation functions. (:issue:`8302`):
 

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -383,12 +383,17 @@ this pathological behavior (:issue:`37827`):
 
 *New behavior*:
 
-.. ipython:: python
-   :okwarning:
+.. code-block:: ipython
 
-    df.mean()
+    In [3]: df.mean()
+    Out[3]:
+    A    1.0
+    dtype: float64
 
-    df[["A"]].mean()
+    In [4]: df[["A"]].mean()
+    Out[4]:
+    A    1.0
+    dtype: float64
 
 Moreover, DataFrame reductions with ``numeric_only=None`` will now be
 consistent with their Series counterparts.  In particular, for
@@ -415,10 +420,10 @@ instead of casting to a NumPy array which may have different semantics (:issue:`
 
 *New behavior*:
 
-.. ipython:: python
-   :okwarning:
+.. code-block:: ipython
 
-    df.any()
+    In [5]: df.any()
+    Out[5]: Series([], dtype: bool)
 
 
 .. _whatsnew_120.api_breaking.python:

diff --git a/doc/source/whatsnew/v1.5.2.rst b/doc/source/whatsnew/v1.5.2.rst
@@ -13,9 +13,11 @@ including other versions of pandas.
 
 Fixed regressions
 ~~~~~~~~~~~~~~~~~
+- Fixed regression in :meth:`MultiIndex.join` for extension array dtypes (:issue:`49277`)
 - Fixed regression in :meth:`Series.replace` raising ``RecursionError`` with numeric dtype and when specifying ``value=None`` (:issue:`45725`)
 - Fixed regression in :meth:`DataFrame.plot` preventing :class:`~matplotlib.colors.Colormap` instance
   from being passed using the ``colormap`` argument if Matplotlib 3.6+ is used (:issue:`49374`)
+- Fixed regression in :func:`date_range` returning an invalid set of periods for ``CustomBusinessDay`` frequency and ``start`` date with timezone (:issue:`49441`)
 -
 
 .. ---------------------------------------------------------------------------