pandas-dev
diff --git a/‎.github/workflows/wheels.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/wheels.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎ci/test_wheels.py
Lines changed: 0 additions & 2 deletions b/‎ci/test_wheels.py
Lines changed: 0 additions & 2 deletions
diff --git a/‎ci/test_wheels_windows.bat
Lines changed: 2 additions & 2 deletions b/‎ci/test_wheels_windows.bat
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/source/development/contributing_codebase.rst
Lines changed: 2 additions & 1 deletion b/‎doc/source/development/contributing_codebase.rst
Lines changed: 2 additions & 1 deletion
diff --git a/‎doc/source/whatsnew/v2.1.0.rst
Lines changed: 7 additions & 0 deletions b/‎doc/source/whatsnew/v2.1.0.rst
Lines changed: 7 additions & 0 deletions
diff --git a/‎pandas/_libs/lib.pyx
Lines changed: 2 additions & 1 deletion b/‎pandas/_libs/lib.pyx
Lines changed: 2 additions & 1 deletion
diff --git a/‎pandas/_libs/tslibs/offsets.pyx
Lines changed: 9 additions & 15 deletions b/‎pandas/_libs/tslibs/offsets.pyx
Lines changed: 9 additions & 15 deletions
diff --git a/‎pandas/_testing/__init__.py
Lines changed: 17 additions & 0 deletions b/‎pandas/_testing/__init__.py
Lines changed: 17 additions & 0 deletions
diff --git a/‎pandas/conftest.py
Lines changed: 30 additions & 7 deletions b/‎pandas/conftest.py
Lines changed: 30 additions & 7 deletions
diff --git a/‎pandas/core/arrays/arrow/array.py
Lines changed: 4 additions & 1 deletion b/‎pandas/core/arrays/arrow/array.py
Lines changed: 4 additions & 1 deletion
diff --git a/‎pandas/core/arrays/categorical.py
Lines changed: 10 additions & 2 deletions b/‎pandas/core/arrays/categorical.py
Lines changed: 10 additions & 2 deletions
diff --git a/‎pandas/core/arrays/datetimelike.py
Lines changed: 9 additions & 2 deletions b/‎pandas/core/arrays/datetimelike.py
Lines changed: 9 additions & 2 deletions
diff --git a/‎pandas/core/frame.py
Lines changed: 2 additions & 2 deletions b/‎pandas/core/frame.py
Lines changed: 2 additions & 2 deletions
@@ -173,8 +173,8 @@ jobs:
           pip install hypothesis>=6.34.2 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17
           cd .. # Not a good idea to test within the src tree
           python -c "import pandas; print(pandas.__version__);
-          pandas.test(extra_args=['-m not clipboard and not single_cpu and not slow and not network and not db', '-n 2', '--no-strict-data-files']);
-          pandas.test(extra_args=['-m not clipboard and single_cpu and not slow and not network and not db', '--no-strict-data-files'])"
+          pandas.test(extra_args=['-m not clipboard and not single_cpu and not slow and not network and not db', '-n 2']);
+          pandas.test(extra_args=['-m not clipboard and single_cpu and not slow and not network and not db'])"
       - uses: actions/upload-artifact@v3
         with:
           name: sdist
 
@@ -41,12 +41,10 @@
     multi_args = [
         "-m not clipboard and not single_cpu and not slow and not network and not db",
         "-n 2",
-        "--no-strict-data-files",
     ]
     pd.test(extra_args=multi_args)
     pd.test(
         extra_args=[
             "-m not clipboard and single_cpu and not slow and not network and not db",
-            "--no-strict-data-files",
         ]
     )
@@ -1,6 +1,6 @@
 set test_command=import pandas as pd; print(pd.__version__); ^
-pd.test(extra_args=['-m not clipboard and not single_cpu and not slow and not network and not db', '--no-strict-data-files', '-n=2']); ^
-pd.test(extra_args=['-m not clipboard and single_cpu and not slow and not network and not db', '--no-strict-data-files'])
+pd.test(extra_args=['-m not clipboard and not single_cpu and not slow and not network and not db', '-n 2']); ^
+pd.test(extra_args=['-m not clipboard and single_cpu and not slow and not network and not db'])
 
 python --version
 pip install pytz six numpy python-dateutil tzdata>=2022.1
 
@@ -812,7 +812,8 @@ install pandas) by typing::
     your installation is probably fine and you can start contributing!
 
 Often it is worth running only a subset of tests first around your changes before running the
-entire suite.
+entire suite (tip: you can use the [pandas-coverage app](https://pandas-coverage.herokuapp.com/)
+to find out which tests hit the lines of code you've modified, and then run only those).
 
 The easiest way to do this is with::
 
 
@@ -105,15 +105,19 @@ Deprecations
 ~~~~~~~~~~~~
 - Deprecated silently dropping unrecognized timezones when parsing strings to datetimes (:issue:`18702`)
 - Deprecated :meth:`DataFrame._data` and :meth:`Series._data`, use public APIs instead (:issue:`33333`)
+- Deprecated :meth:`.Groupby.all` and :meth:`.GroupBy.any` with datetime64 or :class:`PeriodDtype` values, matching the :class:`Series` and :class:`DataFrame` deprecations (:issue:`34479`)
 - Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`)
 - Deprecated the default of ``observed=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby`; this will default to ``True`` in a future version (:issue:`43999`)
 - Deprecated :meth:`DataFrameGroupBy.dtypes`, check ``dtypes`` on the underlying object instead (:issue:`51045`)
 - Deprecated ``axis=1`` in :meth:`DataFrame.groupby` and in :class:`Grouper` constructor, do ``frame.T.groupby(...)`` instead (:issue:`51203`)
+- Deprecated :meth:`Categorical.to_list`, use ``obj.tolist()`` instead (:issue:`51254`)
 - Deprecated passing a :class:`DataFrame` to :meth:`DataFrame.from_records`, use :meth:`DataFrame.set_index` or :meth:`DataFrame.drop` instead (:issue:`51353`)
 - Deprecated accepting slices in :meth:`DataFrame.take`, call ``obj[slicer]`` or pass a sequence of integers instead (:issue:`51539`)
 - Deprecated ``axis=1`` in :meth:`DataFrame.ewm`, :meth:`DataFrame.rolling`, :meth:`DataFrame.expanding`, transpose before calling the method instead (:issue:`51778`)
 - Deprecated the ``axis`` keyword in :meth:`DataFrame.ewm`, :meth:`Series.ewm`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.expanding`, :meth:`Series.expanding` (:issue:`51778`)
+- Deprecated the ``axis`` keyword in :meth:`DataFrame.resample`, :meth:`Series.resample` (:issue:`51778`)
 - Deprecated 'method', 'limit', and 'fill_axis' keywords in :meth:`DataFrame.align` and :meth:`Series.align`, explicitly call ``fillna`` on the alignment results instead (:issue:`51856`)
+- Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`)
 - Deprecated the 'axis' keyword in :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.fillna`, :meth:`.GroupBy.take`, :meth:`.GroupBy.skew`, :meth:`.GroupBy.rank`, :meth:`.GroupBy.cumprod`, :meth:`.GroupBy.cumsum`, :meth:`.GroupBy.cummax`, :meth:`.GroupBy.cummin`, :meth:`.GroupBy.pct_change`, :meth:`GroupBy.diff`, :meth:`.GroupBy.shift`, and :meth:`DataFrameGroupBy.corrwith`; for ``axis=1`` operate on the underlying :class:`DataFrame` instead (:issue:`50405`, :issue:`51046`)
 -
 
@@ -206,6 +210,7 @@ MultiIndex
 
 I/O
 ^^^
+- Bug in :func:`read_html`, tail texts were removed together with elements containing ``display:none`` style (:issue:`51629`)
 - :meth:`DataFrame.to_orc` now raising ``ValueError`` when non-default :class:`Index` is given (:issue:`51828`)
 -
 
@@ -231,6 +236,7 @@ Groupby/resample/rolling
   grouped :class:`Series` or :class:`DataFrame` was a :class:`DatetimeIndex`, :class:`TimedeltaIndex`
   or :class:`PeriodIndex`, and the ``groupby`` method was given a function as its first argument,
   the function operated on the whole index rather than each element of the index. (:issue:`51979`)
+- Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64 or :class:`PeriodDtype` values (:issue:`52128`)
 -
 
 Reshaping
@@ -257,6 +263,7 @@ Styler
 Other
 ^^^^^
 - Bug in :func:`assert_almost_equal` now throwing assertion error for two unequal sets (:issue:`51727`)
+- Bug in :meth:`Series.memory_usage` when ``deep=True`` throw an error with Series of objects and the returned value is incorrect, as it does not take into account GC corrections (:issue:`51858`)
 
 .. ***DO NOT USE THIS SECTION***
 
 
@@ -1,6 +1,7 @@
 from collections import abc
 from decimal import Decimal
 from enum import Enum
+from sys import getsizeof
 from typing import (
     Literal,
     _GenericAlias,
@@ -159,7 +160,7 @@ def memory_usage_of_objects(arr: object[:]) -> int64_t:
 
     n = len(arr)
     for i in range(n):
-        size += arr[i].__sizeof__()
+        size += getsizeof(arr[i])
     return size
 
 
 
@@ -2546,7 +2546,6 @@ cdef class MonthEnd(MonthOffset):
     DateOffset of one month end.
 
     MonthEnd goes to the next date which is an end of the month.
-    To get the end of the current month pass the parameter n equals 0.
 
     See Also
     --------
@@ -2562,10 +2561,10 @@ cdef class MonthEnd(MonthOffset):
     >>> ts + pd.offsets.MonthEnd()
     Timestamp('2022-02-28 00:00:00')
 
-    If you want to get the end of the current month pass the parameter n equals 0:
+    If you want to get the end of the current month:
 
     >>> ts = pd.Timestamp(2022, 1, 31)
-    >>> ts + pd.offsets.MonthEnd(0)
+    >>> pd.offsets.MonthEnd().rollforward(ts)
     Timestamp('2022-01-31 00:00:00')
     """
     _period_dtype_code = PeriodDtypeCode.M
@@ -2578,7 +2577,6 @@ cdef class MonthBegin(MonthOffset):
     DateOffset of one month at beginning.
 
     MonthBegin goes to the next date which is a start of the month.
-    To get the start of the current month pass the parameter n equals 0.
 
     See Also
     --------
@@ -2594,10 +2592,10 @@ cdef class MonthBegin(MonthOffset):
     >>> ts + pd.offsets.MonthBegin()
     Timestamp('2023-01-01 00:00:00')
 
-    If you want to get the start of the current month pass the parameter n equals 0:
+    If you want to get the start of the current month:
 
     >>> ts = pd.Timestamp(2022, 12, 1)
-    >>> ts + pd.offsets.MonthBegin(0)
+    >>> pd.offsets.MonthBegin().rollback(ts)
     Timestamp('2022-12-01 00:00:00')
     """
     _prefix = "MS"
@@ -2609,7 +2607,6 @@ cdef class BusinessMonthEnd(MonthOffset):
     DateOffset increments between the last business day of the month.
 
     BusinessMonthEnd goes to the next date which is the last business day of the month.
-    To get the last business day of the current month pass the parameter n equals 0.
 
     Examples
     --------
@@ -2621,11 +2618,10 @@ cdef class BusinessMonthEnd(MonthOffset):
     >>> ts + pd.offsets.BMonthEnd()
     Timestamp('2022-12-30 00:00:00')
 
-    If you want to get the end of the current business month
-    pass the parameter n equals 0:
+    If you want to get the end of the current business month:
 
     >>> ts = pd.Timestamp(2022, 11, 30)
-    >>> ts + pd.offsets.BMonthEnd(0)
+    >>> pd.offsets.BMonthEnd().rollforward(ts)
     Timestamp('2022-11-30 00:00:00')
     """
     _prefix = "BM"
@@ -2637,8 +2633,7 @@ cdef class BusinessMonthBegin(MonthOffset):
     DateOffset of one month at the first business day.
 
     BusinessMonthBegin goes to the next date which is the first business day
-    of the month. To get the first business day of the current month pass
-    the parameter n equals 0.
+    of the month.
 
     Examples
     --------
@@ -2650,11 +2645,10 @@ cdef class BusinessMonthBegin(MonthOffset):
     >>> ts + pd.offsets.BMonthBegin()
     Timestamp('2023-01-02 00:00:00')
 
-    If you want to get the start of the current business month pass
-    the parameter n equals 0:
+    If you want to get the start of the current business month:
 
     >>> ts = pd.Timestamp(2022, 12, 1)
-    >>> ts + pd.offsets.BMonthBegin(0)
+    >>> pd.offsets.BMonthBegin().rollback(ts)
     Timestamp('2022-12-01 00:00:00')
     """
     _prefix = "BMS"
 
@@ -177,6 +177,23 @@
     np.uint32,
 ]
 
+PYTHON_DATA_TYPES = [
+    str,
+    int,
+    float,
+    complex,
+    list,
+    tuple,
+    range,
+    dict,
+    set,
+    frozenset,
+    bool,
+    bytes,
+    bytearray,
+    memoryview,
+]
+
 ENDIAN = {"little": "<", "big": ">"}[byteorder]
 
 NULL_OBJECTS = [None, np.nan, pd.NaT, float("nan"), pd.NA, Decimal("NaN")]
 
@@ -103,9 +103,9 @@
 
 def pytest_addoption(parser) -> None:
     parser.addoption(
-        "--no-strict-data-files",
-        action="store_false",
-        help="Don't fail if a test is skipped for missing data file.",
+        "--strict-data-files",
+        action="store_true",
+        help="Fail if a test is skipped for missing data file.",
     )
 
 
@@ -760,6 +760,29 @@ def index_or_series_obj(request):
     return _index_or_series_objs[request.param].copy(deep=True)
 
 
+_typ_objects_series = {
+    f"{dtype.__name__}-series": Series(dtype) for dtype in tm.PYTHON_DATA_TYPES
+}
+
+
+_index_or_series_memory_objs = {
+    **indices_dict,
+    **_series,
+    **_narrow_series,
+    **_typ_objects_series,
+}
+
+
+@pytest.fixture(params=_index_or_series_memory_objs.keys())
+def index_or_series_memory_obj(request):
+    """
+    Fixture for tests on indexes, series, series with a narrow dtype and
+    series with empty objects type
+    copy to avoid mutation, e.g. setting .name
+    """
+    return _index_or_series_memory_objs[request.param].copy(deep=True)
+
+
 # ----------------------------------------------------------------
 # DataFrames
 # ----------------------------------------------------------------
@@ -1112,9 +1135,9 @@ def all_numeric_accumulations(request):
 @pytest.fixture
 def strict_data_files(pytestconfig):
     """
-    Returns the configuration for the test setting `--no-strict-data-files`.
+    Returns the configuration for the test setting `--strict-data-files`.
     """
-    return pytestconfig.getoption("--no-strict-data-files")
+    return pytestconfig.getoption("--strict-data-files")
 
 
 @pytest.fixture
@@ -1134,7 +1157,7 @@ def datapath(strict_data_files: str) -> Callable[..., str]:
     Raises
     ------
     ValueError
-        If the path doesn't exist and the --no-strict-data-files option is not set.
+        If the path doesn't exist and the --strict-data-files option is set.
     """
     BASE_PATH = os.path.join(os.path.dirname(__file__), "tests")
 
@@ -1143,7 +1166,7 @@ def deco(*args):
         if not os.path.exists(path):
             if strict_data_files:
                 raise ValueError(
-                    f"Could not find file {path} and --no-strict-data-files is not set."
+                    f"Could not find file {path} and --strict-data-files is set."
                 )
             pytest.skip(f"Could not find {path}.")
         return path
 
@@ -2091,7 +2091,10 @@ def _dt_round(
         return self._round_temporally("round", freq, ambiguous, nonexistent)
 
     def _dt_to_pydatetime(self):
-        return np.array(self._pa_array.to_pylist(), dtype=object)
+        data = self._pa_array.to_pylist()
+        if self._dtype.pyarrow_dtype.unit == "ns":
+            data = [ts.to_pydatetime(warn=False) for ts in data]
+        return np.array(data, dtype=object)
 
     def _dt_tz_localize(
         self,
 
@@ -13,7 +13,8 @@
     cast,
     overload,
 )
-from warnings import warn
+import warnings
+
 
 import numpy as np
 
@@ -553,6 +554,13 @@ def to_list(self):
         """
         Alias for tolist.
         """
+        # GH#51254
+        warnings.warn(
+            "Categorical.to_list is deprecated and will be removed in a future "
+            "version. Use obj.tolist() instead",
+            FutureWarning,
+            stacklevel=find_stack_level(),
+        )
         return self.tolist()
 
     @classmethod
@@ -1280,7 +1288,7 @@ def map(
         Index(['first', 'second', nan], dtype='object')
         """
         if na_action is lib.no_default:
-            warn(
+            warnings.warn(
                 "The default value of 'ignore' for the `na_action` parameter in "
                 "pandas.Categorical.map is deprecated and will be "
                 "changed to 'None' in a future version. Please set na_action to the "
 
@@ -957,10 +957,17 @@ def _cmp_method(self, other, op):
                 if not isinstance(other, type(self)):
                     # i.e. Timedelta/Timestamp, cast to ndarray and let
                     #  compare_mismatched_resolutions handle broadcasting
-                    other_arr = np.array(other.asm8)
+                    try:
+                        # GH#52080 see if we can losslessly cast to shared unit
+                        other = other.as_unit(self.unit, round_ok=False)
+                    except ValueError:
+                        other_arr = np.array(other.asm8)
+                        return compare_mismatched_resolutions(
+                            self._ndarray, other_arr, op
+                        )
                 else:
                     other_arr = other._ndarray
-                return compare_mismatched_resolutions(self._ndarray, other_arr, op)
+                    return compare_mismatched_resolutions(self._ndarray, other_arr, op)
 
         other_vals = self._unbox(other)
         # GH#37462 comparison on i8 values is almost 2x faster than M8/m8
 
@@ -5033,7 +5033,7 @@ def align(
         method: FillnaOptions | None | lib.NoDefault = lib.no_default,
         limit: int | None | lib.NoDefault = lib.no_default,
         fill_axis: Axis | lib.NoDefault = lib.no_default,
-        broadcast_axis: Axis | None = None,
+        broadcast_axis: Axis | None | lib.NoDefault = lib.no_default,
     ) -> tuple[Self, NDFrameT]:
         return super().align(
             other,
@@ -11418,7 +11418,7 @@ def asfreq(
     def resample(
         self,
         rule,
-        axis: Axis = 0,
+        axis: Axis | lib.NoDefault = lib.no_default,
         closed: str | None = None,
         label: str | None = None,
         convention: str = "start",
Original file line number	Diff line number	Diff line change
`@@ -41,12 +41,10 @@`
`41`	`41`	`multi_args = [`
`42`	`42`	`"-m not clipboard and not single_cpu and not slow and not network and not db",`
`43`	`43`	`"-n 2",`
`44`		`- "--no-strict-data-files",`
`45`	`44`	`]`
`46`	`45`	`pd.test(extra_args=multi_args)`
`47`	`46`	`pd.test(`
`48`	`47`	`extra_args=[`
`49`	`48`	`"-m not clipboard and single_cpu and not slow and not network and not db",`
`50`		`- "--no-strict-data-files",`
`51`	`49`	`]`
`52`	`50`	`)`