pandas-dev
diff --git a/‎.pre-commit-config.yaml
Lines changed: 5 additions & 17 deletions b/‎.pre-commit-config.yaml
Lines changed: 5 additions & 17 deletions
diff --git a/‎ci/code_checks.sh
Lines changed: 0 additions & 6 deletions b/‎ci/code_checks.sh
Lines changed: 0 additions & 6 deletions
diff --git a/‎doc/source/user_guide/10min.rst
Lines changed: 10 additions & 0 deletions b/‎doc/source/user_guide/10min.rst
Lines changed: 10 additions & 0 deletions
diff --git a/‎doc/source/user_guide/io.rst
Lines changed: 7 additions & 8 deletions b/‎doc/source/user_guide/io.rst
Lines changed: 7 additions & 8 deletions
diff --git a/‎doc/source/whatsnew/v2.1.0.rst
Lines changed: 9 additions & 0 deletions b/‎doc/source/whatsnew/v2.1.0.rst
Lines changed: 9 additions & 0 deletions
diff --git a/‎environment.yml
Lines changed: 5 additions & 10 deletions b/‎environment.yml
Lines changed: 5 additions & 10 deletions
diff --git a/‎pandas/_libs/tslibs/parsing.pyx
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/tslibs/parsing.pyx
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/api/typing/__init__.py
Lines changed: 5 additions & 0 deletions b/‎pandas/api/typing/__init__.py
Lines changed: 5 additions & 0 deletions
diff --git a/‎pandas/core/arrays/arrow/array.py
Lines changed: 0 additions & 2 deletions b/‎pandas/core/arrays/arrow/array.py
Lines changed: 0 additions & 2 deletions
diff --git a/‎pandas/core/arrays/datetimelike.py
Lines changed: 9 additions & 1 deletion b/‎pandas/core/arrays/datetimelike.py
Lines changed: 9 additions & 1 deletion
diff --git a/‎pandas/core/arrays/numpy_.py
Lines changed: 1 addition & 1 deletion b/‎pandas/core/arrays/numpy_.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/arrays/string_arrow.py
Lines changed: 16 additions & 13 deletions b/‎pandas/core/arrays/string_arrow.py
Lines changed: 16 additions & 13 deletions
diff --git a/‎pandas/core/frame.py
Lines changed: 4 additions & 25 deletions b/‎pandas/core/frame.py
Lines changed: 4 additions & 25 deletions
@@ -15,18 +15,11 @@ default_stages: [
 ci:
     autofix_prs: false
 repos:
--   repo: local
+-   repo: https://github.com/hauntsaninja/black-pre-commit-mirror
+    # black compiled with mypyc
+    rev: 23.3.0
     hooks:
-    # NOTE: we make `black` a local hook because if it's installed from
-    # PyPI (rather than from source) then it'll run twice as fast thanks to mypyc
-    -   id: black
-        name: black
-        description: "Black: The uncompromising Python code formatter"
-        entry: black
-        language: python
-        require_serial: true
-        types_or: [python, pyi]
-        additional_dependencies: [black==23.3.0]
+      - id: black
 -   repo: https://github.com/charliermarsh/ruff-pre-commit
     rev: v0.0.270
     hooks:
@@ -74,7 +67,7 @@ repos:
             --linelength=88,
             '--filter=-readability/casting,-runtime/int,-build/include_subdir,-readability/fn_size'
         ]
--   repo: https://github.com/pycqa/pylint
+-   repo: https://github.com/pylint-dev/pylint
     rev: v3.0.0a6
     hooks:
     -   id: pylint
@@ -93,11 +86,6 @@ repos:
             |^pandas/conftest\.py  # keep excluded
         args: [--disable=all, --enable=redefined-outer-name]
         stages: [manual]
-    -   id: pylint
-        alias: unspecified-encoding
-        name: Using open without explicitly specifying an encoding
-        args: [--disable=all, --enable=unspecified-encoding]
-        stages: [manual]
 -   repo: https://github.com/PyCQA/isort
     rev: 5.12.0
     hooks:
 
@@ -110,12 +110,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         pandas_object \
         pandas.api.interchange.from_dataframe \
         pandas.DatetimeIndex.snap \
-        pandas.core.window.ewm.ExponentialMovingWindow.mean \
-        pandas.core.window.ewm.ExponentialMovingWindow.sum \
-        pandas.core.window.ewm.ExponentialMovingWindow.std \
-        pandas.core.window.ewm.ExponentialMovingWindow.var \
-        pandas.core.window.ewm.ExponentialMovingWindow.corr \
-        pandas.core.window.ewm.ExponentialMovingWindow.cov \
         pandas.api.indexers.BaseIndexer \
         pandas.api.indexers.VariableOffsetWindowIndexer \
         pandas.io.formats.style.Styler \
 
@@ -16,6 +16,16 @@ Customarily, we import as follows:
    import numpy as np
    import pandas as pd
 
+Basic data structures in pandas
+-------------------------------
+
+Pandas provides two types of classes for handling data:
+
+1. :class:`Series`: a one-dimensional labeled array holding data of any type
+    such as integers, strings, Python objects etc.
+2. :class:`DataFrame`: a two-dimensional data structure that holds data like
+   a two-dimension array or a table with rows and columns.
+
 Object creation
 ---------------
 
 
@@ -1568,8 +1568,7 @@ class of the csv module. For this, you have to specify ``sep=None``.
 .. ipython:: python
 
    df = pd.DataFrame(np.random.randn(10, 4))
-   df.to_csv("tmp.csv", sep="|")
-   df.to_csv("tmp2.csv", sep=":")
+   df.to_csv("tmp2.csv", sep=":", index=False)
    pd.read_csv("tmp2.csv", sep=None, engine="python")
 
 .. ipython:: python
@@ -1597,8 +1596,8 @@ rather than reading the entire file into memory, such as the following:
 .. ipython:: python
 
    df = pd.DataFrame(np.random.randn(10, 4))
-   df.to_csv("tmp.csv", sep="|")
-   table = pd.read_csv("tmp.csv", sep="|")
+   df.to_csv("tmp.csv", index=False)
+   table = pd.read_csv("tmp.csv")
    table
 
 
@@ -1607,8 +1606,8 @@ value will be an iterable object of type ``TextFileReader``:
 
 .. ipython:: python
 
-   with pd.read_csv("tmp.csv", sep="|", chunksize=4) as reader:
-       reader
+   with pd.read_csv("tmp.csv", chunksize=4) as reader:
+       print(reader)
        for chunk in reader:
            print(chunk)
 
@@ -1620,8 +1619,8 @@ Specifying ``iterator=True`` will also return the ``TextFileReader`` object:
 
 .. ipython:: python
 
-   with pd.read_csv("tmp.csv", sep="|", iterator=True) as reader:
-       reader.get_chunk(5)
+   with pd.read_csv("tmp.csv", iterator=True) as reader:
+       print(reader.get_chunk(5))
 
 .. ipython:: python
    :suppress:
 
@@ -27,6 +27,14 @@ Copy-on-Write improvements
   of those Index objects for the columns of the DataFrame (:issue:`52947`)
 - Add lazy copy mechanism to :meth:`DataFrame.eval` (:issue:`53746`)
 
+- Trying to operate inplace on a temporary column selection
+  (for example, ``df["a"].fillna(100, inplace=True)``)
+  will now always raise a warning when Copy-on-Write is enabled. In this mode,
+  operating inplace like this will never work, since the selection behaves
+  as a temporary copy. This holds true for:
+
+  - DataFrame.fillna / Series.fillna
+
 .. _whatsnew_210.enhancements.enhancement2:
 
 ``map(func, na_action="ignore")`` now works for all array types
@@ -241,6 +249,7 @@ Other API changes
 Deprecations
 ~~~~~~~~~~~~
 - Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`)
+- Deprecated 'downcast' keyword in :meth:`Index.fillna` (:issue:`53956`)
 - Deprecated 'fill_method' and 'limit' keywords in :meth:`DataFrame.pct_change`, :meth:`Series.pct_change`, :meth:`DataFrameGroupBy.pct_change`, and :meth:`SeriesGroupBy.pct_change`, explicitly call ``ffill`` or ``bfill`` before calling ``pct_change`` instead (:issue:`53491`)
 - Deprecated 'method', 'limit', and 'fill_axis' keywords in :meth:`DataFrame.align` and :meth:`Series.align`, explicitly call ``fillna`` on the alignment results instead (:issue:`51856`)
 - Deprecated 'quantile' keyword in :meth:`Rolling.quantile` and :meth:`Expanding.quantile`, renamed as 'q' instead (:issue:`52550`)
 
@@ -17,7 +17,6 @@ dependencies:
   - pytest-cov
   - pytest-xdist>=2.2.0
   - pytest-asyncio>=0.17.0
-  - pytest-localserver>=0.7.1
   - coverage
 
   # required dependencies
@@ -40,7 +39,7 @@ dependencies:
   - lxml>=4.8.0
   - matplotlib>=3.6.1
   - numba>=0.55.2
-  - numexpr>=2.8.0  # pin for "Run checks on imported code" job
+  - numexpr>=2.8.0
   - openpyxl>=3.0.10
   - odfpy>=1.4.1
   - py
@@ -76,14 +75,10 @@ dependencies:
   - cxx-compiler
 
   # code checks
-  - black=23.3.0
-  - cpplint
-  - flake8=6.0.0
-  - isort>=5.2.1  # check that imports are in the right order
-  - mypy=1.2
+  - flake8=6.0.0  # run in subprocess over docstring examples
+  - mypy=1.2  # pre-commit uses locally installed mypy
+  - tokenize-rt  # scripts/check_for_inconsistent_pandas_namespace.py
   - pre-commit>=2.15.0
-  - pyupgrade
-  - ruff=0.0.215
 
   # documentation
   - gitpython  # obtain contributors from git for whatsnew
@@ -119,6 +114,6 @@ dependencies:
   - pygments # Code highlighting
 
   - pip:
-      - sphinx-toggleprompt
+      - sphinx-toggleprompt  # conda-forge version has stricter pins on jinja2
       - typing_extensions; python_version<"3.11"
       - tzdata>=2022.1
@@ -704,7 +704,7 @@ cdef datetime dateutil_parse(
                 #  we get tzlocal, once the deprecation is enforced will get
                 #  timezone.utc, not raise.
                 warnings.warn(
-                    "Parsing '{res.tzname}' as tzlocal (dependent on system timezone) "
+                    f"Parsing '{res.tzname}' as tzlocal (dependent on system timezone) "
                     "is deprecated and will raise in a future version. Pass the 'tz' "
                     "keyword or call tz_localize after construction instead",
                     FutureWarning,
 
@@ -2,6 +2,9 @@
 Public API classes that store intermediate results useful for type-hinting.
 """
 
+from pandas._libs import NaTType
+from pandas._libs.missing import NAType
+
 from pandas.core.groupby import (
     DataFrameGroupBy,
     SeriesGroupBy,
@@ -36,6 +39,8 @@
     "ExponentialMovingWindow",
     "ExponentialMovingWindowGroupby",
     "JsonReader",
+    "NaTType",
+    "NAType",
     "PeriodIndexResamplerGroupby",
     "Resampler",
     "Rolling",
 
@@ -2026,8 +2026,6 @@ def _str_repeat(self, repeats: int | Sequence[int]):
             raise NotImplementedError(
                 f"repeat is not implemented when repeats is {type(repeats).__name__}"
             )
-        elif pa_version_under7p0:
-            raise NotImplementedError("repeat is not implemented for pyarrow < 7")
         else:
             return type(self)(pc.binary_repeat(self._pa_array, repeats))
 
 
@@ -2211,7 +2211,15 @@ def factorize(
                 codes = codes[::-1]
                 uniques = uniques[::-1]
             return codes, uniques
-        # FIXME: shouldn't get here; we are ignoring sort
+
+        if sort:
+            # algorithms.factorize only passes sort=True here when freq is
+            #  not None, so this should not be reached.
+            raise NotImplementedError(
+                f"The 'sort' keyword in {type(self).__name__}.factorize is "
+                "ignored unless arr.freq is not None. To factorize with sort, "
+                "call pd.factorize(obj, sort=True) instead."
+            )
         return super().factorize(use_na_sentinel=use_na_sentinel)
 
     @classmethod
 
@@ -247,7 +247,7 @@ def pad_or_backfill(
 
         meth = missing.clean_fill_method(method)
         missing.pad_or_backfill_inplace(
-            out_data,
+            out_data.T,
             method=meth,
             axis=0,
             limit=limit,
 
@@ -307,28 +307,31 @@ def _str_contains(
             return super()._str_contains(pat, case, flags, na, regex)
 
         if regex:
-            if case is False:
-                fallback_performancewarning()
-                return super()._str_contains(pat, case, flags, na, regex)
-            else:
-                result = pc.match_substring_regex(self._pa_array, pat)
+            result = pc.match_substring_regex(self._pa_array, pat, ignore_case=not case)
         else:
-            if case:
-                result = pc.match_substring(self._pa_array, pat)
-            else:
-                result = pc.match_substring(pc.utf8_upper(self._pa_array), pat.upper())
+            result = pc.match_substring(self._pa_array, pat, ignore_case=not case)
         result = BooleanDtype().__from_arrow__(result)
         if not isna(na):
             result[isna(result)] = bool(na)
         return result
 
     def _str_startswith(self, pat: str, na=None):
-        pat = f"^{re.escape(pat)}"
-        return self._str_contains(pat, na=na, regex=True)
+        result = pc.starts_with(self._pa_array, pattern=pat)
+        if not isna(na):
+            result = result.fill_null(na)
+        result = BooleanDtype().__from_arrow__(result)
+        if not isna(na):
+            result[isna(result)] = bool(na)
+        return result
 
     def _str_endswith(self, pat: str, na=None):
-        pat = f"{re.escape(pat)}$"
-        return self._str_contains(pat, na=na, regex=True)
+        result = pc.ends_with(self._pa_array, pattern=pat)
+        if not isna(na):
+            result = result.fill_null(na)
+        result = BooleanDtype().__from_arrow__(result)
+        if not isna(na):
+            result[isna(result)] = bool(na)
+        return result
 
     def _str_replace(
         self,
 
@@ -961,13 +961,6 @@ def _is_homogeneous_type(self) -> bool:
         -------
         bool
 
-        See Also
-        --------
-        Index._is_homogeneous_type : Whether the object has a single
-            dtype.
-        MultiIndex._is_homogeneous_type : Whether all the levels of a
-            MultiIndex have the same dtype.
-
         Examples
         --------
         >>> DataFrame({"A": [1, 2], "B": [3, 4]})._is_homogeneous_type
@@ -983,12 +976,8 @@ def _is_homogeneous_type(self) -> bool:
         ...    "B": np.array([1, 2], dtype=np.int64)})._is_homogeneous_type
         False
         """
-        if isinstance(self._mgr, ArrayManager):
-            return len({arr.dtype for arr in self._mgr.arrays}) == 1
-        if self._mgr.any_extension_types:
-            return len({block.dtype for block in self._mgr.blocks}) == 1
-        else:
-            return not self._is_mixed_type
+        # The "<" part of "<=" here is for empty DataFrame cases
+        return len({arr.dtype for arr in self._mgr.arrays}) <= 1
 
     @property
     def _can_fast_transpose(self) -> bool:
@@ -4958,7 +4947,7 @@ def _reindex_multi(
         if row_indexer is not None and col_indexer is not None:
             # Fastpath. By doing two 'take's at once we avoid making an
             #  unnecessary copy.
-            # We only get here with `not self._is_mixed_type`, which (almost)
+            # We only get here with `self._can_fast_transpose`, which (almost)
             #  ensures that self.values is cheap. It may be worth making this
             #  condition more specific.
             indexer = row_indexer, col_indexer
@@ -10849,17 +10838,7 @@ def count(self, axis: Axis = 0, numeric_only: bool = False):
         if len(frame._get_axis(axis)) == 0:
             result = self._constructor_sliced(0, index=frame._get_agg_axis(axis))
         else:
-            if frame._is_mixed_type or frame._mgr.any_extension_types:
-                # the or any_extension_types is really only hit for single-
-                # column frames with an extension array
-                result = notna(frame).sum(axis=axis)
-            else:
-                # GH13407
-                series_counts = notna(frame).sum(axis=axis)
-                counts = series_counts._values
-                result = self._constructor_sliced(
-                    counts, index=frame._get_agg_axis(axis), copy=False
-                )
+            result = notna(frame).sum(axis=axis)
 
         return result.astype("int64").__finalize__(self, method="count")
Original file line number	Diff line number	Diff line change
`@@ -2026,8 +2026,6 @@ def _str_repeat(self, repeats: int \| Sequence[int]):`
`2026`	`2026`	`raise NotImplementedError(`
`2027`	`2027`	`f"repeat is not implemented when repeats is {type(repeats).__name__}"`
`2028`	`2028`	`)`
`2029`		`- elif pa_version_under7p0:`
`2030`		`- raise NotImplementedError("repeat is not implemented for pyarrow < 7")`
`2031`	`2029`	`else:`
`2032`	`2030`	`return type(self)(pc.binary_repeat(self._pa_array, repeats))`
`2033`	`2031`