pandas-dev
diff --git a/‎.gitignore
Lines changed: 4 additions & 2 deletions b/‎.gitignore
Lines changed: 4 additions & 2 deletions
diff --git a/‎.pre-commit-config.yaml
Lines changed: 5 additions & 5 deletions b/‎.pre-commit-config.yaml
Lines changed: 5 additions & 5 deletions
diff --git a/‎doc/source/user_guide/categorical.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/user_guide/categorical.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v0.23.0.rst
Lines changed: 26 additions & 5 deletions b/‎doc/source/whatsnew/v0.23.0.rst
Lines changed: 26 additions & 5 deletions
diff --git a/‎doc/source/whatsnew/v2.1.2.rst
Lines changed: 0 additions & 1 deletion b/‎doc/source/whatsnew/v2.1.2.rst
Lines changed: 0 additions & 1 deletion
diff --git a/‎doc/source/whatsnew/v2.1.4.rst
Lines changed: 1 addition & 0 deletions b/‎doc/source/whatsnew/v2.1.4.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/source/whatsnew/v2.2.0.rst
Lines changed: 2 additions & 0 deletions b/‎doc/source/whatsnew/v2.2.0.rst
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/_libs/include/pandas/parser/tokenizer.h
Lines changed: 0 additions & 3 deletions b/‎pandas/_libs/include/pandas/parser/tokenizer.h
Lines changed: 0 additions & 3 deletions
diff --git a/‎pandas/_libs/include/pandas/vendored/ujson/python/version.h
Lines changed: 0 additions & 41 deletions b/‎pandas/_libs/include/pandas/vendored/ujson/python/version.h
Lines changed: 0 additions & 41 deletions
diff --git a/‎pandas/_libs/src/parser/pd_parser.c
Lines changed: 1 addition & 0 deletions b/‎pandas/_libs/src/parser/pd_parser.c
Lines changed: 1 addition & 0 deletions
diff --git a/‎pandas/_libs/src/parser/tokenizer.c
Lines changed: 2 additions & 1 deletion b/‎pandas/_libs/src/parser/tokenizer.c
Lines changed: 2 additions & 1 deletion
diff --git a/‎pandas/_libs/src/vendored/numpy/datetime/np_datetime.c
Lines changed: 1 addition & 2 deletions b/‎pandas/_libs/src/vendored/numpy/datetime/np_datetime.c
Lines changed: 1 addition & 2 deletions
diff --git a/‎pandas/_libs/src/vendored/numpy/datetime/np_datetime_strings.c
Lines changed: 1 addition & 2 deletions b/‎pandas/_libs/src/vendored/numpy/datetime/np_datetime_strings.c
Lines changed: 1 addition & 2 deletions
diff --git a/‎pandas/_libs/src/vendored/ujson/lib/ultrajsondec.c
Lines changed: 0 additions & 1 deletion b/‎pandas/_libs/src/vendored/ujson/lib/ultrajsondec.c
Lines changed: 0 additions & 1 deletion
diff --git a/‎pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c
Lines changed: 0 additions & 2 deletions b/‎pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c
Lines changed: 0 additions & 2 deletions
diff --git a/‎pandas/_libs/src/vendored/ujson/python/objToJSON.c
Lines changed: 0 additions & 1 deletion b/‎pandas/_libs/src/vendored/ujson/python/objToJSON.c
Lines changed: 0 additions & 1 deletion
diff --git a/‎pandas/_libs/src/vendored/ujson/python/ujson.c
Lines changed: 0 additions & 1 deletion b/‎pandas/_libs/src/vendored/ujson/python/ujson.c
Lines changed: 0 additions & 1 deletion
diff --git a/‎pandas/_testing/__init__.py
Lines changed: 0 additions & 14 deletions b/‎pandas/_testing/__init__.py
Lines changed: 0 additions & 14 deletions
diff --git a/‎pandas/conftest.py
Lines changed: 5 additions & 3 deletions b/‎pandas/conftest.py
Lines changed: 5 additions & 3 deletions
diff --git a/‎pandas/core/apply.py
Lines changed: 11 additions & 0 deletions b/‎pandas/core/apply.py
Lines changed: 11 additions & 0 deletions
diff --git a/‎pandas/core/arrays/period.py
Lines changed: 6 additions & 1 deletion b/‎pandas/core/arrays/period.py
Lines changed: 6 additions & 1 deletion
diff --git a/‎pandas/core/base.py
Lines changed: 1 addition & 1 deletion b/‎pandas/core/base.py
Lines changed: 1 addition & 1 deletion
@@ -39,6 +39,7 @@
 .mesonpy-native-file.ini
 MANIFEST
 compile_commands.json
+debug
 .debug
 
 # Python files #
@@ -104,10 +105,11 @@ scikits
 # Generated Sources #
 #####################
 !skts.c
-!np_datetime.c
-!np_datetime_strings.c
 *.c
 *.cpp
+!pandas/_libs/src/**/*.c
+!pandas/_libs/src/**/*.h
+!pandas/_libs/include/**/*.h
 
 # Unit / Performance Testing #
 ##############################
 
@@ -20,11 +20,11 @@ ci:
 repos:
 -   repo: https://github.com/hauntsaninja/black-pre-commit-mirror
     # black compiled with mypyc
-    rev: 23.10.1
+    rev: 23.11.0
     hooks:
       - id: black
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.4
+    rev: v0.1.6
     hooks:
     -   id: ruff
         args: [--exit-non-zero-on-fix]
@@ -47,7 +47,7 @@ repos:
         types_or: [python, rst, markdown, cython, c]
         additional_dependencies: [tomli]
 -   repo: https://github.com/MarcoGorelli/cython-lint
-    rev: v0.15.0
+    rev: v0.16.0
     hooks:
     -   id: cython-lint
     -   id: double-quote-cython-strings
@@ -111,11 +111,11 @@ repos:
         types: [text]  # overwrite types: [rst]
         types_or: [python, rst]
 -   repo: https://github.com/sphinx-contrib/sphinx-lint
-    rev: v0.8.1
+    rev: v0.9.0
     hooks:
     - id: sphinx-lint
 -   repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v17.0.4
+    rev: v17.0.6
     hooks:
     - id: clang-format
       files: ^pandas/_libs/src|^pandas/_libs/include
 
@@ -647,7 +647,7 @@ Pivot tables:
 
     raw_cat = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "c"])
     df = pd.DataFrame({"A": raw_cat, "B": ["c", "d", "c", "d"], "values": [1, 2, 3, 4]})
-    pd.pivot_table(df, values="values", index=["A", "B"])
+    pd.pivot_table(df, values="values", index=["A", "B"], observed=False)
 
 Data munging
 ------------
 
@@ -286,12 +286,33 @@ For pivoting operations, this behavior is *already* controlled by the ``dropna``
    df = pd.DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
    df
 
-.. ipython:: python
 
-   pd.pivot_table(df, values='values', index=['A', 'B'],
-                  dropna=True)
-   pd.pivot_table(df, values='values', index=['A', 'B'],
-                  dropna=False)
+.. code-block:: ipython
+
+    In [1]: pd.pivot_table(df, values='values', index=['A', 'B'], dropna=True)
+
+    Out[1]:
+         values
+    A B
+    a c     1.0
+      d     2.0
+    b c     3.0
+      d     4.0
+
+    In [2]: pd.pivot_table(df, values='values', index=['A', 'B'], dropna=False)
+
+    Out[2]:
+         values
+    A B
+    a c     1.0
+      d     2.0
+      y     NaN
+    b c     3.0
+      d     4.0
+      y     NaN
+    z c     NaN
+      d     NaN
+      y     NaN
 
 
 .. _whatsnew_0230.enhancements.window_raw:
 
@@ -38,7 +38,6 @@ Fixed regressions
 Bug fixes
 ~~~~~~~~~
 - Fixed bug in :class:`.DataFrameGroupBy` reductions not preserving object dtype when ``infer_string`` is set (:issue:`55620`)
-- Fixed bug in :meth:`.DataFrameGroupBy.min()` and :meth:`.DataFrameGroupBy.max()` not preserving extension dtype for empty object (:issue:`55619`)
 - Fixed bug in :meth:`.SeriesGroupBy.value_counts` returning incorrect dtype for string columns (:issue:`55627`)
 - Fixed bug in :meth:`Categorical.equals` if other has arrow backed string dtype (:issue:`55364`)
 - Fixed bug in :meth:`DataFrame.__setitem__` not inferring string dtype for zero-dimensional array with ``infer_string=True`` (:issue:`55366`)
 
@@ -24,6 +24,7 @@ Bug fixes
 - Bug in :class:`Series` constructor raising DeprecationWarning when ``index`` is a list of :class:`Series` (:issue:`55228`)
 - Bug in :meth:`Index.__getitem__` returning wrong result for Arrow dtypes and negative stepsize (:issue:`55832`)
 - Fixed bug in :func:`to_numeric` converting to extension dtype for ``string[pyarrow_numpy]`` dtype (:issue:`56179`)
+- Fixed bug in :meth:`.DataFrameGroupBy.min()` and :meth:`.DataFrameGroupBy.max()` not preserving extension dtype for empty object (:issue:`55619`)
 - Fixed bug in :meth:`DataFrame.__setitem__` casting :class:`Index` with object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
 - Fixed bug in :meth:`DataFrame.to_hdf` raising when columns have ``StringDtype`` (:issue:`55088`)
 - Fixed bug in :meth:`Index.insert` casting object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
 
@@ -226,6 +226,7 @@ Other enhancements
 - Allow passing ``read_only``, ``data_only`` and ``keep_links`` arguments to openpyxl using ``engine_kwargs`` of :func:`read_excel` (:issue:`55027`)
 - DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`)
 - Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`)
+- Improved error message that appears in :meth:`DatetimeIndex.to_period` with frequencies which are not supported as period frequencies, such as "BMS" (:issue:`56243`)
 - Improved error message when constructing :class:`Period` with invalid offsets such as "QS" (:issue:`55785`)
 
 .. ---------------------------------------------------------------------------
@@ -434,6 +435,7 @@ Other Deprecations
 - Deprecated the ``ordinal`` keyword in :class:`PeriodIndex`, use :meth:`PeriodIndex.from_ordinals` instead (:issue:`55960`)
 - Deprecated the ``unit`` keyword in :class:`TimedeltaIndex` construction, use :func:`to_timedelta` instead (:issue:`55499`)
 - Deprecated the behavior of :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype; in a future version these will not perform dtype inference on the resulting :class:`Index`, do ``result.index = result.index.infer_objects()`` to retain the old behavior (:issue:`56161`)
+- Deprecated the default of ``observed=False`` in :meth:`DataFrame.pivot_table`; will be ``True`` in a future version (:issue:`56236`)
 - Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)
 - Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`)
 - Deprecated the previous implementation of :class:`DataFrame.stack`; specify ``future_stack=True`` to adopt the future version (:issue:`53515`)
 
@@ -18,11 +18,8 @@ See LICENSE for the license
 #define ERROR_OVERFLOW 2
 #define ERROR_INVALID_CHARS 3
 
-#include "pandas/portable.h"
 #include <stdint.h>
 
-#include "pandas/vendored/klib/khash.h"
-
 #define STREAM_INIT_SIZE 32
 
 #define REACHED_EOF 1
 
@@ -10,6 +10,7 @@ Distributed under the terms of the BSD Simplified License.
 
 #include "pandas/parser/pd_parser.h"
 #include "pandas/parser/io.h"
+#include "pandas/portable.h"
 
 static int to_double(char *item, double *p_value, char sci, char decimal,
                      int *maybe_int) {
 
@@ -16,15 +16,16 @@ Python's built-in csv module and Warren Weckesser's textreader project on
 GitHub. See Python Software Foundation License and BSD licenses for these.
 
 */
-
 #include "pandas/parser/tokenizer.h"
+#include "pandas/portable.h"
 
 #include <ctype.h>
 #include <float.h>
 #include <math.h>
 #include <stdbool.h>
 
 #include "pandas/portable.h"
+#include "pandas/vendored/klib/khash.h" // for kh_int64_t, kh_destroy_int64
 
 void coliter_setup(coliter_t *self, parser_t *parser, int64_t i,
                    int64_t start) {
 
@@ -25,9 +25,8 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
 #include <Python.h>
 
 #include "pandas/vendored/numpy/datetime/np_datetime.h"
-#include <numpy/arrayobject.h>
-#include <numpy/arrayscalars.h>
 #include <numpy/ndarraytypes.h>
+#include <numpy/npy_common.h>
 
 #if defined(_WIN32)
 #ifndef ENABLE_INTSAFE_SIGNED_FUNCTIONS
 
@@ -32,9 +32,8 @@ This file implements string parsing and creation for NumPy datetime.
 
 #include <time.h>
 
-#include <numpy/arrayobject.h>
-#include <numpy/arrayscalars.h>
 #include <numpy/ndarraytypes.h>
+#include <numpy/npy_common.h>
 
 #include "pandas/vendored/numpy/datetime/np_datetime.h"
 #include "pandas/vendored/numpy/datetime/np_datetime_strings.h"
 
@@ -41,7 +41,6 @@ Numeric decoder derived from TCL library
 // Licence at LICENSES/ULTRAJSON_LICENSE
 
 #include "pandas/vendored/ujson/lib/ultrajson.h"
-#include <assert.h>
 #include <errno.h>
 #include <limits.h>
 #include <locale.h>
 
@@ -41,8 +41,6 @@ Numeric decoder derived from TCL library
 // Licence at LICENSES/ULTRAJSON_LICENSE
 
 #include "pandas/vendored/ujson/lib/ultrajson.h"
-#include <assert.h>
-#include <float.h>
 #include <locale.h>
 #include <math.h>
 #include <stdint.h>
 
@@ -40,7 +40,6 @@ Numeric decoder derived from TCL library
 
 #define PY_SSIZE_T_CLEAN
 #include <Python.h>
-#include <math.h>
 
 #define NO_IMPORT_ARRAY
 #define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY
 
@@ -38,7 +38,6 @@ Numeric decoder derived from TCL library
 
 // Licence at LICENSES/ULTRAJSON_LICENSE
 
-#include "pandas/vendored/ujson/python/version.h"
 #define PY_SSIZE_T_CLEAN
 #include <Python.h>
 #define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY
 
@@ -101,14 +101,11 @@
 if TYPE_CHECKING:
     from pandas._typing import (
         Dtype,
-        Frequency,
         NpDtype,
     )
 
     from pandas.core.arrays import ArrowExtensionArray
 
-_N = 30
-
 UNSIGNED_INT_NUMPY_DTYPES: list[NpDtype] = ["uint8", "uint16", "uint32", "uint64"]
 UNSIGNED_INT_EA_DTYPES: list[Dtype] = ["UInt8", "UInt16", "UInt32", "UInt64"]
 SIGNED_INT_NUMPY_DTYPES: list[NpDtype] = [int, "int8", "int16", "int32", "int64"]
@@ -339,16 +336,6 @@ def to_array(obj):
 # Others
 
 
-def makeTimeSeries(nper=None, freq: Frequency = "B", name=None) -> Series:
-    if nper is None:
-        nper = _N
-    return Series(
-        np.random.default_rng(2).standard_normal(nper),
-        index=date_range("2000-01-01", periods=nper, freq=freq),
-        name=name,
-    )
-
-
 def makeCustomIndex(
     nentries,
     nlevels,
@@ -883,7 +870,6 @@ def shares_memory(left, right) -> bool:
     "loc",
     "makeCustomDataframe",
     "makeCustomIndex",
-    "makeTimeSeries",
     "maybe_produces_warning",
     "NARROW_NP_DTYPES",
     "NP_NAT_OBJECTS",
 
@@ -766,9 +766,11 @@ def datetime_series() -> Series:
     """
     Fixture for Series of floats with DatetimeIndex
     """
-    s = tm.makeTimeSeries()
-    s.name = "ts"
-    return s
+    return Series(
+        np.random.default_rng(2).standard_normal(30),
+        index=date_range("2000-01-01", periods=30, freq="B"),
+        name="ts",
+    )
 
 
 def _create_series(index):
 
@@ -20,6 +20,7 @@
 from pandas._config import option_context
 
 from pandas._libs import lib
+from pandas._libs.internals import BlockValuesRefs
 from pandas._typing import (
     AggFuncType,
     AggFuncTypeBase,
@@ -1254,6 +1255,8 @@ def series_generator(self) -> Generator[Series, None, None]:
         ser = self.obj._ixs(0, axis=0)
         mgr = ser._mgr
 
+        is_view = mgr.blocks[0].refs.has_reference()  # type: ignore[union-attr]
+
         if isinstance(ser.dtype, ExtensionDtype):
             # values will be incorrect for this block
             # TODO(EA2D): special case would be unnecessary with 2D EAs
@@ -1267,6 +1270,14 @@ def series_generator(self) -> Generator[Series, None, None]:
                 ser._mgr = mgr
                 mgr.set_values(arr)
                 object.__setattr__(ser, "_name", name)
+                if not is_view:
+                    # In apply_series_generator we store the a shallow copy of the
+                    # result, which potentially increases the ref count of this reused
+                    # `ser` object (depending on the result of the applied function)
+                    # -> if that happened and `ser` is already a copy, then we reset
+                    # the refs here to avoid triggering a unnecessary CoW inside the
+                    # applied function (https://github.com/pandas-dev/pandas/pull/56212)
+                    mgr.blocks[0].refs = BlockValuesRefs(mgr.blocks[0])  # type: ignore[union-attr]
                 yield ser
 
     @staticmethod
 
@@ -1174,7 +1174,12 @@ def dt64arr_to_periodarr(
 
     reso = get_unit_from_dtype(data.dtype)
     freq = Period._maybe_convert_freq(freq)
-    base = freq._period_dtype_code
+    try:
+        base = freq._period_dtype_code
+    except (AttributeError, TypeError):
+        # AttributeError: _period_dtype_code might not exist
+        # TypeError: _period_dtype_code might intentionally raise
+        raise TypeError(f"{freq.name} is not supported as period frequency")
     return c_dt64arr_to_periodarr(data.view("i8"), base, tz, reso=reso), freq
 
 
 
@@ -108,7 +108,7 @@ class PandasObject(DirNamesMixin):
     @property
     def _constructor(self):
         """
-        Class constructor (for this class it's just `__class__`.
+        Class constructor (for this class it's just `__class__`).
         """
         return type(self)