Skip to content

Commit a671eb7

Browse files
committed
Merge branch 'main' of https://github.com/pandas-dev/pandas
2 parents 65de448 + b4d851c commit a671eb7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+530
-334
lines changed

.github/workflows/wheels.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ jobs:
153153
run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"
154154

155155
- name: Build wheels
156-
uses: pypa/cibuildwheel@v2.23.1
156+
uses: pypa/cibuildwheel@v2.23.2
157157
with:
158158
package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
159159
env:

.pre-commit-config.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
minimum_pre_commit_version: 2.15.0
1+
minimum_pre_commit_version: 4.0.0
22
exclude: ^LICENSES/|\.(html|csv|svg)$
33
# reserve "manual" for relatively slow hooks which we still want to run in CI
44
default_stages: [
@@ -19,13 +19,13 @@ ci:
1919
skip: [pyright, mypy]
2020
repos:
2121
- repo: https://github.com/astral-sh/ruff-pre-commit
22-
rev: v0.9.9
22+
rev: v0.11.4
2323
hooks:
2424
- id: ruff
2525
args: [--exit-non-zero-on-fix]
2626
exclude: ^pandas/tests/frame/test_query_eval.py
2727
- id: ruff
28-
# TODO: remove autofixe-only rules when they are checked by ruff
28+
# TODO: remove autofix only rules when they are checked by ruff
2929
name: ruff-selected-autofixes
3030
alias: ruff-selected-autofixes
3131
files: ^pandas
@@ -34,7 +34,7 @@ repos:
3434
- id: ruff-format
3535
exclude: ^scripts|^pandas/tests/frame/test_query_eval.py
3636
- repo: https://github.com/jendrikseipp/vulture
37-
rev: 'v2.14'
37+
rev: v2.14
3838
hooks:
3939
- id: vulture
4040
entry: python scripts/run_vulture.py
@@ -95,14 +95,14 @@ repos:
9595
- id: sphinx-lint
9696
args: ["--enable", "all", "--disable", "line-too-long"]
9797
- repo: https://github.com/pre-commit/mirrors-clang-format
98-
rev: v19.1.7
98+
rev: v20.1.0
9999
hooks:
100100
- id: clang-format
101101
files: ^pandas/_libs/src|^pandas/_libs/include
102102
args: [-i]
103103
types_or: [c, c++]
104104
- repo: https://github.com/trim21/pre-commit-mirror-meson
105-
rev: v1.7.0
105+
rev: v1.7.2
106106
hooks:
107107
- id: meson-fmt
108108
args: ['--inplace']

asv_bench/benchmarks/frame_methods.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -517,7 +517,7 @@ def setup(self):
517517
self.df = DataFrame(np.random.randn(1000, 100))
518518

519519
self.s = Series(np.arange(1028.0))
520-
self.df2 = DataFrame({i: self.s for i in range(1028)})
520+
self.df2 = DataFrame(dict.fromkeys(range(1028), self.s))
521521
self.df3 = DataFrame(np.random.randn(1000, 3), columns=list("ABC"))
522522

523523
def time_apply_user_func(self):

asv_bench/benchmarks/indexing_engines.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,14 @@ class NumericEngineIndexing:
6767
def setup(self, engine_and_dtype, index_type, unique, N):
6868
engine, dtype = engine_and_dtype
6969

70+
if (
71+
index_type == "non_monotonic"
72+
and dtype in [np.int16, np.int8, np.uint8]
73+
and unique
74+
):
75+
# Values overflow
76+
raise NotImplementedError
77+
7078
if index_type == "monotonic_incr":
7179
if unique:
7280
arr = np.arange(N * 3, dtype=dtype)
@@ -115,6 +123,14 @@ def setup(self, engine_and_dtype, index_type, unique, N):
115123
engine, dtype = engine_and_dtype
116124
dtype = dtype.lower()
117125

126+
if (
127+
index_type == "non_monotonic"
128+
and dtype in ["int16", "int8", "uint8"]
129+
and unique
130+
):
131+
# Values overflow
132+
raise NotImplementedError
133+
118134
if index_type == "monotonic_incr":
119135
if unique:
120136
arr = np.arange(N * 3, dtype=dtype)

ci/code_checks.sh

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7272
-i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
7373
-i "pandas.Period.freq GL08" \
7474
-i "pandas.Period.ordinal GL08" \
75-
-i "pandas.Timestamp.max PR02" \
76-
-i "pandas.Timestamp.min PR02" \
77-
-i "pandas.Timestamp.resolution PR02" \
7875
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
7976
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
8077
-i "pandas.core.resample.Resampler.quantile PR01,PR07" \

doc/source/development/debugging_extensions.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ By default building pandas from source will generate a release build. To generat
2323

2424
.. note::
2525

26-
conda environments update CFLAGS/CPPFLAGS with flags that are geared towards generating releases. If using conda, you may need to set ``CFLAGS="$CFLAGS -O0"`` and ``CPPFLAGS="$CPPFLAGS -O0"`` to ensure optimizations are turned off for debugging
26+
conda environments update CFLAGS/CPPFLAGS with flags that are geared towards generating releases, and may work counter towards usage in a development environment. If using conda, you should unset these environment variables via ``export CFLAGS=`` and ``export CPPFLAGS=``
2727

2828
By specifying ``builddir="debug"`` all of the targets will be built and placed in the debug directory relative to the project root. This helps to keep your debug and release artifacts separate; you are of course able to choose a different directory name or omit altogether if you do not care to separate build types.
2929

doc/source/getting_started/comparison/comparison_with_r.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ In Python, since ``a`` is a list, you can simply use list comprehension.
383383

384384
.. ipython:: python
385385
386-
a = np.array(list(range(1, 24)) + [np.NAN]).reshape(2, 3, 4)
386+
a = np.array(list(range(1, 24)) + [np.nan]).reshape(2, 3, 4)
387387
pd.DataFrame([tuple(list(x) + [val]) for x, val in np.ndenumerate(a)])
388388
389389
meltlist
@@ -402,7 +402,7 @@ In Python, this list would be a list of tuples, so
402402

403403
.. ipython:: python
404404
405-
a = list(enumerate(list(range(1, 5)) + [np.NAN]))
405+
a = list(enumerate(list(range(1, 5)) + [np.nan]))
406406
pd.DataFrame(a)
407407
408408
For more details and examples see :ref:`the Intro to Data Structures

doc/source/user_guide/basics.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2064,12 +2064,12 @@ different numeric dtypes will **NOT** be combined. The following example will gi
20642064

20652065
.. ipython:: python
20662066
2067-
df1 = pd.DataFrame(np.random.randn(8, 1), columns=["A"], dtype="float32")
2067+
df1 = pd.DataFrame(np.random.randn(8, 1), columns=["A"], dtype="float64")
20682068
df1
20692069
df1.dtypes
20702070
df2 = pd.DataFrame(
20712071
{
2072-
"A": pd.Series(np.random.randn(8), dtype="float16"),
2072+
"A": pd.Series(np.random.randn(8), dtype="float32"),
20732073
"B": pd.Series(np.random.randn(8)),
20742074
"C": pd.Series(np.random.randint(0, 255, size=8), dtype="uint8"), # [0,255] (range of uint8)
20752075
}

doc/source/user_guide/enhancingperf.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ can be improved by passing an ``np.ndarray``.
171171
In [4]: %%cython
172172
...: cimport numpy as np
173173
...: import numpy as np
174+
...: np.import_array()
174175
...: cdef double f_typed(double x) except? -2:
175176
...: return x * (x - 1)
176177
...: cpdef double integrate_f_typed(double a, double b, int N):
@@ -225,6 +226,7 @@ and ``wraparound`` checks can yield more performance.
225226
...: cimport cython
226227
...: cimport numpy as np
227228
...: import numpy as np
229+
...: np.import_array()
228230
...: cdef np.float64_t f_typed(np.float64_t x) except? -2:
229231
...: return x * (x - 1)
230232
...: cpdef np.float64_t integrate_f_typed(np.float64_t a, np.float64_t b, np.int64_t N):

doc/source/whatsnew/v0.11.0.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,10 @@ Numeric dtypes will propagate and can coexist in DataFrames. If a dtype is passe
7474

7575
.. ipython:: python
7676
77-
df1 = pd.DataFrame(np.random.randn(8, 1), columns=['A'], dtype='float32')
77+
df1 = pd.DataFrame(np.random.randn(8, 1), columns=['A'], dtype='float64')
7878
df1
7979
df1.dtypes
80-
df2 = pd.DataFrame({'A': pd.Series(np.random.randn(8), dtype='float16'),
80+
df2 = pd.DataFrame({'A': pd.Series(np.random.randn(8), dtype='float32'),
8181
'B': pd.Series(np.random.randn(8)),
8282
'C': pd.Series(range(8), dtype='uint8')})
8383
df2

doc/source/whatsnew/v3.0.0.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ Other enhancements
6868
- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
6969
- :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
7070
- :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`)
71+
- :meth:`DataFrame.iloc` and :meth:`Series.iloc` now support boolean masks in ``__getitem__`` for more consistent indexing behavior (:issue:`60994`)
7172
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
7273
- :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
7374
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
@@ -762,6 +763,7 @@ Plotting
762763
- Bug in :meth:`DataFrame.plot.bar` with ``stacked=True`` where labels on stacked bars with zero-height segments were incorrectly positioned at the base instead of the label position of the previous segment (:issue:`59429`)
763764
- Bug in :meth:`DataFrame.plot.line` raising ``ValueError`` when set both color and a ``dict`` style (:issue:`59461`)
764765
- Bug in :meth:`DataFrame.plot` that causes a shift to the right when the frequency multiplier is greater than one. (:issue:`57587`)
766+
- Bug in :meth:`Series.plot` preventing a line and scatter plot from being aligned (:issue:`61005`)
765767
- Bug in :meth:`Series.plot` with ``kind="pie"`` with :class:`ArrowDtype` (:issue:`59192`)
766768

767769
Groupby/resample/rolling
@@ -773,6 +775,7 @@ Groupby/resample/rolling
773775
- Bug in :meth:`.DataFrameGroupBy.quantile` when ``interpolation="nearest"`` is inconsistent with :meth:`DataFrame.quantile` (:issue:`47942`)
774776
- Bug in :meth:`.Resampler.interpolate` on a :class:`DataFrame` with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`)
775777
- Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`)
778+
- Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` were not keeping the index name when the index had :class:`ArrowDtype` timestamp dtype (:issue:`61222`)
776779
- Bug in :meth:`DataFrame.resample` changing index type to :class:`MultiIndex` when the dataframe is empty and using an upsample method (:issue:`55572`)
777780
- Bug in :meth:`DataFrameGroupBy.agg` that raises ``AttributeError`` when there is dictionary input and duplicated columns, instead of returning a DataFrame with the aggregation of all duplicate columns. (:issue:`55041`)
778781
- Bug in :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` for empty data frame with ``group_keys=False`` still creating output index using group keys. (:issue:`60471`)
@@ -825,6 +828,7 @@ Other
825828
- Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
826829
- Bug in :class:`Series` ignoring errors when trying to convert :class:`Series` input data to the given ``dtype`` (:issue:`60728`)
827830
- Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`)
831+
- Bug in :func:`eval` where method calls on binary operations like ``(x + y).dropna()`` would raise ``AttributeError: 'BinOp' object has no attribute 'value'`` (:issue:`61175`)
828832
- Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`)
829833
- Bug in :func:`eval` with ``engine="numexpr"`` returning unexpected result for float division. (:issue:`59736`)
830834
- Bug in :func:`to_numeric` raising ``TypeError`` when ``arg`` is a :class:`Timedelta` or :class:`Timestamp` scalar. (:issue:`59944`)
@@ -834,6 +838,7 @@ Other
834838
- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`)
835839
- Bug in :meth:`DataFrame.query` where using duplicate column names led to a ``TypeError``. (:issue:`59950`)
836840
- Bug in :meth:`DataFrame.query` which raised an exception or produced incorrect results when expressions contained backtick-quoted column names containing the hash character ``#``, backticks, or characters that fall outside the ASCII range (U+0001..U+007F). (:issue:`59285`) (:issue:`49633`)
841+
- Bug in :meth:`DataFrame.query` which raised an exception when querying integer column names using backticks. (:issue:`60494`)
837842
- Bug in :meth:`DataFrame.shift` where passing a ``freq`` on a DataFrame with no columns did not shift the index correctly. (:issue:`60102`)
838843
- Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)
839844
- Bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`)

environment.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ dependencies:
2323

2424
# required dependencies
2525
- python-dateutil
26-
- numpy<2
26+
- numpy<3
2727

2828
# optional dependencies
2929
- beautifulsoup4>=4.11.2
@@ -80,7 +80,7 @@ dependencies:
8080
- flake8=7.1.0 # run in subprocess over docstring examples
8181
- mypy=1.13.0 # pre-commit uses locally installed mypy
8282
- tokenize-rt # scripts/check_for_inconsistent_pandas_namespace.py
83-
- pre-commit>=4.0.1
83+
- pre-commit>=4.2.0
8484

8585
# documentation
8686
- gitpython # obtain contributors from git for whatsnew

pandas/_libs/algos.pyx

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -391,10 +391,11 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
391391
# clip `covxy / divisor` to ensure coeff is within bounds
392392
if divisor != 0:
393393
val = covxy / divisor
394-
if val > 1.0:
395-
val = 1.0
396-
elif val < -1.0:
397-
val = -1.0
394+
if not cov:
395+
if val > 1.0:
396+
val = 1.0
397+
elif val < -1.0:
398+
val = -1.0
398399
result[xi, yi] = result[yi, xi] = val
399400
else:
400401
result[xi, yi] = result[yi, xi] = NaN

pandas/_libs/tslibs/timedeltas.pyi

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ from typing import (
33
ClassVar,
44
Literal,
55
TypeAlias,
6-
TypeVar,
76
overload,
87
)
98

@@ -60,7 +59,6 @@ UnitChoices: TypeAlias = Literal[
6059
"nanos",
6160
"nanosecond",
6261
]
63-
_S = TypeVar("_S", bound=timedelta)
6462

6563
def get_unit_for_round(freq, creso: int) -> int: ...
6664
def disallow_ambiguous_unit(unit: str | None) -> None: ...
@@ -95,11 +93,11 @@ class Timedelta(timedelta):
9593
_value: int # np.int64
9694
# error: "__new__" must return a class instance (got "Union[Timestamp, NaTType]")
9795
def __new__( # type: ignore[misc]
98-
cls: type[_S],
96+
cls: type[Self],
9997
value=...,
10098
unit: str | None = ...,
10199
**kwargs: float | np.integer | np.floating,
102-
) -> _S | NaTType: ...
100+
) -> Self | NaTType: ...
103101
@classmethod
104102
def _from_value_and_reso(cls, value: np.int64, reso: int) -> Timedelta: ...
105103
@property

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 77 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -200,8 +200,9 @@ class MinMaxReso:
200200
201201
See also: timedeltas.MinMaxReso
202202
"""
203-
def __init__(self, name):
203+
def __init__(self, name, docstring):
204204
self._name = name
205+
self.__doc__ = docstring
205206

206207
def __get__(self, obj, type=None):
207208
cls = Timestamp
@@ -216,11 +217,15 @@ class MinMaxReso:
216217

217218
if obj is None:
218219
# i.e. this is on the class, default to nanos
219-
return cls(val)
220+
result = cls(val)
220221
elif self._name == "resolution":
221-
return Timedelta._from_value_and_reso(val, obj._creso)
222+
result = Timedelta._from_value_and_reso(val, obj._creso)
222223
else:
223-
return Timestamp._from_value_and_reso(val, obj._creso, tz=None)
224+
result = Timestamp._from_value_and_reso(val, obj._creso, tz=None)
225+
226+
result.__doc__ = self.__doc__
227+
228+
return result
224229

225230
def __set__(self, obj, value):
226231
raise AttributeError(f"{self._name} is not settable.")
@@ -235,9 +240,74 @@ cdef class _Timestamp(ABCTimestamp):
235240
dayofweek = _Timestamp.day_of_week
236241
dayofyear = _Timestamp.day_of_year
237242

238-
min = MinMaxReso("min")
239-
max = MinMaxReso("max")
240-
resolution = MinMaxReso("resolution") # GH#21336, GH#21365
243+
_docstring_min = """
244+
Returns the minimum bound possible for Timestamp.
245+
246+
This property provides access to the smallest possible value that
247+
can be represented by a Timestamp object.
248+
249+
Returns
250+
-------
251+
Timestamp
252+
253+
See Also
254+
--------
255+
Timestamp.max: Returns the maximum bound possible for Timestamp.
256+
Timestamp.resolution: Returns the smallest possible difference between
257+
non-equal Timestamp objects.
258+
259+
Examples
260+
--------
261+
>>> pd.Timestamp.min
262+
Timestamp('1677-09-21 00:12:43.145224193')
263+
"""
264+
265+
_docstring_max = """
266+
Returns the maximum bound possible for Timestamp.
267+
268+
This property provides access to the largest possible value that
269+
can be represented by a Timestamp object.
270+
271+
Returns
272+
-------
273+
Timestamp
274+
275+
See Also
276+
--------
277+
Timestamp.min: Returns the minimum bound possible for Timestamp.
278+
Timestamp.resolution: Returns the smallest possible difference between
279+
non-equal Timestamp objects.
280+
281+
Examples
282+
--------
283+
>>> pd.Timestamp.max
284+
Timestamp('2262-04-11 23:47:16.854775807')
285+
"""
286+
287+
_docstring_reso = """
288+
Returns the smallest possible difference between non-equal Timestamp objects.
289+
290+
The resolution value is determined by the underlying representation of time
291+
units and is equivalent to Timedelta(nanoseconds=1).
292+
293+
Returns
294+
-------
295+
Timedelta
296+
297+
See Also
298+
--------
299+
Timestamp.max: Returns the maximum bound possible for Timestamp.
300+
Timestamp.min: Returns the minimum bound possible for Timestamp.
301+
302+
Examples
303+
--------
304+
>>> pd.Timestamp.resolution
305+
Timedelta('0 days 00:00:00.000000001')
306+
"""
307+
308+
min = MinMaxReso("min", _docstring_min)
309+
max = MinMaxReso("max", _docstring_max)
310+
resolution = MinMaxReso("resolution", _docstring_reso) # GH#21336, GH#21365
241311

242312
@property
243313
def value(self) -> int:

pandas/compat/numpy/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@
3636
r".*In the future `np\.long` will be defined as.*",
3737
FutureWarning,
3838
)
39-
np_long = np.long # type: ignore[attr-defined]
40-
np_ulong = np.ulong # type: ignore[attr-defined]
39+
np_long = np.long
40+
np_ulong = np.ulong
4141
except AttributeError:
4242
np_long = np.int_
4343
np_ulong = np.uint

0 commit comments

Comments
 (0)