Skip to content

Commit b650064

Browse files
Merge remote-tracking branch 'upstream/main' into string-dtype-predicates-nan-propagation
2 parents bf02000 + 3f8d3e4 commit b650064

35 files changed

+491
-265
lines changed

ci/code_checks.sh

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
9292
-i "pandas.Series.dt.day_name PR01,PR02" \
9393
-i "pandas.Series.dt.floor PR01,PR02" \
9494
-i "pandas.Series.dt.freq GL08" \
95-
-i "pandas.Series.dt.microseconds SA01" \
9695
-i "pandas.Series.dt.month_name PR01,PR02" \
9796
-i "pandas.Series.dt.nanoseconds SA01" \
9897
-i "pandas.Series.dt.normalize PR01" \
@@ -109,20 +108,13 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
109108
-i "pandas.Series.sparse.from_coo PR07,SA01" \
110109
-i "pandas.Series.sparse.npoints SA01" \
111110
-i "pandas.Series.sparse.sp_values SA01" \
112-
-i "pandas.Timedelta.asm8 SA01" \
113-
-i "pandas.Timedelta.ceil SA01" \
114111
-i "pandas.Timedelta.components SA01" \
115-
-i "pandas.Timedelta.floor SA01" \
116112
-i "pandas.Timedelta.max PR02" \
117113
-i "pandas.Timedelta.min PR02" \
118114
-i "pandas.Timedelta.resolution PR02" \
119-
-i "pandas.Timedelta.round SA01" \
120-
-i "pandas.Timedelta.to_numpy PR01" \
121115
-i "pandas.Timedelta.to_timedelta64 SA01" \
122116
-i "pandas.Timedelta.total_seconds SA01" \
123117
-i "pandas.Timedelta.view SA01" \
124-
-i "pandas.TimedeltaIndex.components SA01" \
125-
-i "pandas.TimedeltaIndex.microseconds SA01" \
126118
-i "pandas.TimedeltaIndex.nanoseconds SA01" \
127119
-i "pandas.TimedeltaIndex.seconds SA01" \
128120
-i "pandas.TimedeltaIndex.to_pytimedelta RT03,SA01" \
@@ -131,10 +123,8 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
131123
-i "pandas.Timestamp.nanosecond GL08" \
132124
-i "pandas.Timestamp.resolution PR02" \
133125
-i "pandas.Timestamp.tzinfo GL08" \
134-
-i "pandas.Timestamp.value GL08" \
135126
-i "pandas.Timestamp.year GL08" \
136127
-i "pandas.api.extensions.ExtensionArray.interpolate PR01,SA01" \
137-
-i "pandas.api.interchange.from_dataframe RT03,SA01" \
138128
-i "pandas.api.types.is_bool PR01,SA01" \
139129
-i "pandas.api.types.is_categorical_dtype SA01" \
140130
-i "pandas.api.types.is_complex PR01,SA01" \

doc/source/whatsnew/v2.3.0.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,8 @@ Conversion
103103
Strings
104104
^^^^^^^
105105
- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
106-
-
106+
- Bug in the ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype not matching the python behavior in corner cases with an odd number of fill characters (:issue:`54792`)
107+
107108

108109
Interval
109110
^^^^^^^^

doc/source/whatsnew/v3.0.0.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ Other enhancements
5353
- :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`)
5454
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
5555
- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
56+
- :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
5657
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
5758
- Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)
5859
- Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)
@@ -503,6 +504,7 @@ Performance improvements
503504
- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`)
504505
- :meth:`Series.str.partition` with :class:`ArrowDtype` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57768`)
505506
- Performance improvement in :class:`DataFrame` when ``data`` is a ``dict`` and ``columns`` is specified (:issue:`24368`)
507+
- Performance improvement in :class:`MultiIndex` when setting :attr:`MultiIndex.names` doesn't invalidate all cached operations (:issue:`59578`)
506508
- Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`)
507509
- Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`)
508510
- Performance improvement in :meth:`DataFrame.join` with ``how="left"`` or ``how="right"`` and ``sort=True`` (:issue:`56919`)
@@ -526,6 +528,7 @@ Performance improvements
526528
- Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`)
527529
- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`)
528530
- Performance improvement in :func:`merge` if hash-join can be used (:issue:`57970`)
531+
- Performance improvement in :meth:`CategoricalDtype.update_dtype` when ``dtype`` is a :class:`CategoricalDtype` with non ``None`` categories and ordered (:issue:`59647`)
529532
- Performance improvement in :meth:`to_hdf` avoid unnecessary reopenings of the HDF5 file to speedup data addition to files with a very large number of groups . (:issue:`58248`)
530533
- Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
531534
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)

pandas/_libs/tslibs/timedeltas.pyx

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1421,9 +1421,16 @@ cdef class _Timedelta(timedelta):
14211421
"""
14221422
Convert the Timedelta to a NumPy timedelta64.
14231423

1424-
This is an alias method for `Timedelta.to_timedelta64()`. The dtype and
1425-
copy parameters are available here only for compatibility. Their values
1426-
will not affect the return value.
1424+
This is an alias method for `Timedelta.to_timedelta64()`.
1425+
1426+
Parameters
1427+
----------
1428+
dtype : NoneType
1429+
It is available here only for compatibility. Its value will not
1430+
affect the return value.
1431+
copy : bool, default False
1432+
It is available here only for compatibility. Its value will not
1433+
affect the return value.
14271434

14281435
Returns
14291436
-------
@@ -1498,6 +1505,12 @@ cdef class _Timedelta(timedelta):
14981505
numpy timedelta64 array scalar view
14991506
Array scalar view of the timedelta in nanoseconds.
15001507

1508+
See Also
1509+
--------
1510+
Timedelta.total_seconds : Return the total seconds in the duration.
1511+
Timedelta.components : Return a namedtuple of the Timedelta's components.
1512+
Timedelta.to_timedelta64 : Convert the Timedelta to a numpy.timedelta64.
1513+
15011514
Examples
15021515
--------
15031516
>>> td = pd.Timedelta('1 days 2 min 3 us 42 ns')
@@ -2061,6 +2074,12 @@ class Timedelta(_Timedelta):
20612074
------
20622075
ValueError if the freq cannot be converted
20632076
2077+
See Also
2078+
--------
2079+
Timedelta.floor : Floor the Timedelta to the specified resolution.
2080+
Timedelta.round : Round the Timedelta to the nearest specified resolution.
2081+
Timestamp.ceil : Similar method for Timestamp objects.
2082+
20642083
Examples
20652084
--------
20662085
>>> td = pd.Timedelta('1001ms')
@@ -2081,6 +2100,16 @@ class Timedelta(_Timedelta):
20812100
Frequency string indicating the flooring resolution.
20822101
It uses the same units as class constructor :class:`~pandas.Timedelta`.
20832102
2103+
Returns
2104+
-------
2105+
Timedelta
2106+
A new Timedelta object floored to the specified resolution.
2107+
2108+
See Also
2109+
--------
2110+
Timestamp.ceil : Round the Timestamp up to the nearest specified resolution.
2111+
Timestamp.round : Round the Timestamp to the nearest specified resolution.
2112+
20842113
Examples
20852114
--------
20862115
>>> td = pd.Timedelta('1001ms')
@@ -2101,6 +2130,16 @@ class Timedelta(_Timedelta):
21012130
Frequency string indicating the ceiling resolution.
21022131
It uses the same units as class constructor :class:`~pandas.Timedelta`.
21032132
2133+
Returns
2134+
-------
2135+
Timedelta
2136+
A new Timedelta object ceiled to the specified resolution.
2137+
2138+
See Also
2139+
--------
2140+
Timedelta.floor : Floor the Timedelta to the specified resolution.
2141+
Timedelta.round : Round the Timedelta to the nearest specified resolution.
2142+
21042143
Examples
21052144
--------
21062145
>>> td = pd.Timedelta('1001ms')

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,27 @@ cdef class _Timestamp(ABCTimestamp):
240240

241241
@property
242242
def value(self) -> int:
243+
"""
244+
Return the value of the Timestamp.
245+
246+
Returns
247+
-------
248+
int
249+
The integer representation of the Timestamp object in nanoseconds
250+
since the Unix epoch (1970-01-01 00:00:00 UTC).
251+
252+
See Also
253+
--------
254+
Timestamp.second : Return the second of the Timestamp.
255+
Timestamp.minute : Return the minute of the Timestamp.
256+
257+
Examples
258+
--------
259+
>>> ts = pd.Timestamp("2024-08-31 16:16:30")
260+
>>> ts.value
261+
1725120990000000000
262+
"""
263+
243264
try:
244265
return convert_reso(self._value, self._creso, NPY_FR_ns, False)
245266
except OverflowError:
@@ -1020,8 +1041,8 @@ cdef class _Timestamp(ABCTimestamp):
10201041

10211042
See Also
10221043
--------
1023-
Timestamp.day : Return the day of the year.
1024-
Timestamp.year : Return the year of the week.
1044+
Timestamp.day : Return the day of the Timestamp.
1045+
Timestamp.year : Return the year of the Timestamp.
10251046

10261047
Examples
10271048
--------

pandas/_testing/asserters.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def assert_index_equal(
188188
check_order: bool = True,
189189
rtol: float = 1.0e-5,
190190
atol: float = 1.0e-8,
191-
obj: str = "Index",
191+
obj: str | None = None,
192192
) -> None:
193193
"""
194194
Check that left and right Index are equal.
@@ -217,7 +217,7 @@ def assert_index_equal(
217217
Relative tolerance. Only used when check_exact is False.
218218
atol : float, default 1e-8
219219
Absolute tolerance. Only used when check_exact is False.
220-
obj : str, default 'Index'
220+
obj : str, default 'Index' or 'MultiIndex'
221221
Specify object name being compared, internally used to show appropriate
222222
assertion message.
223223
@@ -235,6 +235,9 @@ def assert_index_equal(
235235
"""
236236
__tracebackhide__ = True
237237

238+
if obj is None:
239+
obj = "MultiIndex" if isinstance(left, MultiIndex) else "Index"
240+
238241
def _check_types(left, right, obj: str = "Index") -> None:
239242
if not exact:
240243
return
@@ -283,7 +286,7 @@ def _check_types(left, right, obj: str = "Index") -> None:
283286
right = cast(MultiIndex, right)
284287

285288
for level in range(left.nlevels):
286-
lobj = f"MultiIndex level [{level}]"
289+
lobj = f"{obj} level [{level}]"
287290
try:
288291
# try comparison on levels/codes to avoid densifying MultiIndex
289292
assert_index_equal(
@@ -314,7 +317,7 @@ def _check_types(left, right, obj: str = "Index") -> None:
314317
obj=lobj,
315318
)
316319
# get_level_values may change dtype
317-
_check_types(left.levels[level], right.levels[level], obj=obj)
320+
_check_types(left.levels[level], right.levels[level], obj=lobj)
318321

319322
# skip exact index checking when `check_categorical` is False
320323
elif check_exact and check_categorical:
@@ -527,7 +530,7 @@ def assert_interval_array_equal(
527530
kwargs["check_freq"] = False
528531

529532
assert_equal(left._left, right._left, obj=f"{obj}.left", **kwargs)
530-
assert_equal(left._right, right._right, obj=f"{obj}.left", **kwargs)
533+
assert_equal(left._right, right._right, obj=f"{obj}.right", **kwargs)
531534

532535
assert_attr_equal("closed", left, right, obj=obj)
533536

pandas/core/array_algos/quantile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def quantile_with_mask(
9191
if is_empty:
9292
# create the array of na_values
9393
# 2d len(values) * len(qs)
94-
flat = np.array([fill_value] * len(qs))
94+
flat = np.full(len(qs), fill_value)
9595
result = np.repeat(flat, len(values)).reshape(len(values), len(qs))
9696
else:
9797
result = _nanquantile(

0 commit comments

Comments
 (0)