Skip to content

Commit 5abe369

Browse files
committed
Merge remote-tracking branch 'upstream/main' into nonattype
2 parents 5a7b312 + 4a65264 commit 5abe369

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+581
-331
lines changed

Dockerfile

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM quay.io/condaforge/miniforge3:4.11.0-0
1+
FROM quay.io/condaforge/miniforge3
22

33
# if you forked pandas, you can pass in your own GitHub username to use your fork
44
# i.e. gh_username=myname
@@ -12,6 +12,11 @@ ENV DEBIAN_FRONTEND=noninteractive
1212
RUN apt-get update \
1313
&& apt-get -y install --no-install-recommends apt-utils dialog 2>&1 \
1414
#
15+
# Install tzdata and configure timezone (fix for tests which try to read from "/etc/localtime")
16+
&& apt-get -y install tzdata \
17+
&& ln -fs /usr/share/zoneinfo/Etc/UTC /etc/localtime \
18+
&& dpkg-reconfigure -f noninteractive tzdata \
19+
#
1520
# Verify git, process tools, lsb-release (common in install instructions for CLIs) installed
1621
&& apt-get -y install git iproute2 procps iproute2 lsb-release \
1722
#

asv_bench/benchmarks/indexing.py

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -204,11 +204,11 @@ class MultiIndexing:
204204
param_names = ["unique_levels"]
205205

206206
def setup(self, unique_levels):
207-
self.ndim = 2
207+
self.nlevels = 2
208208
if unique_levels:
209-
mi = MultiIndex.from_arrays([range(1000000)] * self.ndim)
209+
mi = MultiIndex.from_arrays([range(1000000)] * self.nlevels)
210210
else:
211-
mi = MultiIndex.from_product([range(1000)] * self.ndim)
211+
mi = MultiIndex.from_product([range(1000)] * self.nlevels)
212212
self.df = DataFrame(np.random.randn(len(mi)), index=mi)
213213

214214
self.tgt_slice = slice(200, 800)
@@ -232,27 +232,27 @@ def time_loc_partial_key_list(self, unique_levels):
232232
def time_loc_partial_key_scalar(self, unique_levels):
233233
self.df.loc[self.tgt_scalar, :]
234234

235-
def time_loc_partial_bool_indexer(self, unique_levels):
235+
def time_loc_partial_key_bool_indexer(self, unique_levels):
236236
self.df.loc[self.tgt_bool_indexer, :]
237237

238238
def time_loc_all_slices(self, unique_levels):
239-
target = tuple([self.tgt_slice] * self.ndim)
239+
target = tuple([self.tgt_slice] * self.nlevels)
240240
self.df.loc[target, :]
241241

242242
def time_loc_all_null_slices(self, unique_levels):
243-
target = tuple([self.tgt_null_slice] * self.ndim)
243+
target = tuple([self.tgt_null_slice] * self.nlevels)
244244
self.df.loc[target, :]
245245

246246
def time_loc_all_lists(self, unique_levels):
247-
target = tuple([self.tgt_list] * self.ndim)
247+
target = tuple([self.tgt_list] * self.nlevels)
248248
self.df.loc[target, :]
249249

250250
def time_loc_all_scalars(self, unique_levels):
251-
target = tuple([self.tgt_scalar] * self.ndim)
251+
target = tuple([self.tgt_scalar] * self.nlevels)
252252
self.df.loc[target, :]
253253

254254
def time_loc_all_bool_indexers(self, unique_levels):
255-
target = tuple([self.tgt_bool_indexer] * self.ndim)
255+
target = tuple([self.tgt_bool_indexer] * self.nlevels)
256256
self.df.loc[target, :]
257257

258258
def time_loc_slice_plus_null_slice(self, unique_levels):
@@ -263,6 +263,18 @@ def time_loc_null_slice_plus_slice(self, unique_levels):
263263
target = (self.tgt_null_slice, self.tgt_slice)
264264
self.df.loc[target, :]
265265

266+
def time_xs_level_0(self, unique_levels):
267+
target = self.tgt_scalar
268+
self.df.xs(target, level=0)
269+
270+
def time_xs_level_1(self, unique_levels):
271+
target = self.tgt_scalar
272+
self.df.xs(target, level=1)
273+
274+
def time_xs_full_key(self, unique_levels):
275+
target = tuple([self.tgt_scalar] * self.nlevels)
276+
self.df.xs(target)
277+
266278

267279
class IntervalIndexing:
268280
def setup_cache(self):
@@ -290,20 +302,26 @@ def setup(self):
290302
self.dti = dti
291303
self.dti2 = dti2
292304

293-
index = np.random.choice(dti, 10000, replace=True)
294-
df = DataFrame(index=index, data={"a": 1})
295-
df_sort = df.sort_index()
296-
self.df = df
297-
self.df_sort = df_sort
298-
299305
def time_get_indexer_mismatched_tz(self):
300306
# reached via e.g.
301307
# ser = Series(range(len(dti)), index=dti)
302308
# ser[dti2]
303309
self.dti.get_indexer(self.dti2)
304310

311+
312+
class SortedAndUnsortedDatetimeIndexLoc:
313+
def setup(self):
314+
dti = date_range("2016-01-01", periods=10000, tz="US/Pacific")
315+
index = np.array(dti)
316+
317+
unsorted_index = index.copy()
318+
unsorted_index[10] = unsorted_index[20]
319+
320+
self.df_unsorted = DataFrame(index=unsorted_index, data={"a": 1})
321+
self.df_sort = DataFrame(index=index, data={"a": 1})
322+
305323
def time_loc_unsorted(self):
306-
self.df.loc["2016-6-11"]
324+
self.df_unsorted.loc["2016-6-11"]
307325

308326
def time_loc_sorted(self):
309327
self.df_sort.loc["2016-6-11"]

asv_bench/benchmarks/reindex.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,22 @@ def setup(self):
2828
index = MultiIndex.from_arrays([level1, level2])
2929
self.s = Series(np.random.randn(N * K), index=index)
3030
self.s_subset = self.s[::2]
31+
self.s_subset_no_cache = self.s[::2].copy()
3132

3233
def time_reindex_dates(self):
3334
self.df.reindex(self.rng_subset)
3435

3536
def time_reindex_columns(self):
3637
self.df2.reindex(columns=self.df.columns[1:5])
3738

38-
def time_reindex_multiindex(self):
39+
def time_reindex_multiindex_with_cache(self):
40+
# MultiIndex._values gets cached
3941
self.s.reindex(self.s_subset.index)
4042

43+
def time_reindex_multiindex_no_cache(self):
44+
# Copy to avoid MultiIndex._values getting cached
45+
self.s.reindex(self.s_subset_no_cache.index.copy())
46+
4147

4248
class ReindexMethod:
4349

32.4 KB
Loading

doc/source/whatsnew/v1.4.2.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ including other versions of pandas.
1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717
- Fixed regression in :meth:`DataFrame.drop` and :meth:`Series.drop` when :class:`Index` had extension dtype and duplicates (:issue:`45860`)
18+
- Fixed memory performance regression in :meth:`Series.fillna` when called on a :class:`DataFrame` column with ``inplace=True`` (:issue:`46149`)
19+
- Provided an alternative solution for passing custom Excel formats in :meth:`.Styler.to_excel`, which was a regression based on stricter CSS validation. Examples available in the documentation for :meth:`.Styler.format` (:issue:`46152`)
1820
-
1921

2022
.. ---------------------------------------------------------------------------

doc/source/whatsnew/v1.5.0.rst

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ Other Deprecations
289289

290290
Performance improvements
291291
~~~~~~~~~~~~~~~~~~~~~~~~
292+
- Performance improvement in :meth:`DataFrame.corrwith` for column-wise (axis=0) Pearson and Spearman correlation when other is a :class:`Series` (:issue:`46174`)
292293
- Performance improvement in :meth:`.GroupBy.transform` for some user-defined DataFrame -> Series functions (:issue:`45387`)
293294
- Performance improvement in :meth:`DataFrame.duplicated` when subset consists of only one column (:issue:`45236`)
294295
- Performance improvement in :meth:`.GroupBy.diff` (:issue:`16706`)
@@ -297,9 +298,9 @@ Performance improvements
297298
- Performance improvement in :meth:`MultiIndex.get_locs` (:issue:`45681`, :issue:`46040`)
298299
- Performance improvement in :func:`merge` when left and/or right are empty (:issue:`45838`)
299300
- Performance improvement in :meth:`DataFrame.join` when left and/or right are empty (:issue:`46015`)
301+
- Performance improvement in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when target is a :class:`MultiIndex` (:issue:`46235`)
300302
- Performance improvement in :func:`factorize` (:issue:`46109`)
301303
- Performance improvement in :class:`DataFrame` and :class:`Series` constructors for extension dtype scalars (:issue:`45854`)
302-
-
303304

304305
.. ---------------------------------------------------------------------------
305306
.. _whatsnew_150.bug_fixes:
@@ -392,6 +393,7 @@ Missing
392393

393394
MultiIndex
394395
^^^^^^^^^^
396+
- Bug in :meth:`Series.to_numpy` where multiindexed Series could not be converted to numpy arrays when an ``na_value`` was supplied (:issue:`45774`)
395397
- Bug in :class:`MultiIndex.equals` not commutative when only one side has extension array dtype (:issue:`46026`)
396398
-
397399

@@ -406,6 +408,7 @@ I/O
406408
- Bug in :func:`read_parquet` when ``engine="pyarrow"`` which caused partial write to disk when column of unsupported datatype was passed (:issue:`44914`)
407409
- Bug in :func:`DataFrame.to_excel` and :class:`ExcelWriter` would raise when writing an empty DataFrame to a ``.ods`` file (:issue:`45793`)
408410
- Bug in Parquet roundtrip for Interval dtype with ``datetime64[ns]`` subtype (:issue:`45881`)
411+
- Bug in :func:`read_excel` when reading a ``.ods`` file with newlines between xml elements(:issue:`45598`)
409412

410413
Period
411414
^^^^^^
@@ -418,7 +421,7 @@ Plotting
418421
- Bug in :meth:`DataFrame.plot.box` that prevented labeling the x-axis (:issue:`45463`)
419422
- Bug in :meth:`DataFrame.boxplot` that prevented passing in ``xlabel`` and ``ylabel`` (:issue:`45463`)
420423
- Bug in :meth:`DataFrame.boxplot` that prevented specifying ``vert=False`` (:issue:`36918`)
421-
-
424+
- Bug in :meth:`DataFrame.plot.scatter` that prevented specifying ``norm`` (:issue:`45809`)
422425

423426
Groupby/resample/rolling
424427
^^^^^^^^^^^^^^^^^^^^^^^^
@@ -428,6 +431,10 @@ Groupby/resample/rolling
428431
- Bug in :meth:`.ExponentialMovingWindow.mean` with ``axis=1`` and ``engine='numba'`` when the :class:`DataFrame` has more columns than rows (:issue:`46086`)
429432
- Bug when using ``engine="numba"`` would return the same jitted function when modifying ``engine_kwargs`` (:issue:`46086`)
430433
- Bug in :meth:`.DataFrameGroupby.transform` fails when ``axis=1`` and ``func`` is ``"first"`` or ``"last"`` (:issue:`45986`)
434+
- Bug in :meth:`DataFrameGroupby.cumsum` with ``skipna=False`` giving incorrect results (:issue:`46216`)
435+
- Bug in :meth:`.GroupBy.cumsum` with ``timedelta64[ns]`` dtype failing to recognize ``NaT`` as a null value (:issue:`46216`)
436+
- Bug in :meth:`GroupBy.cummin` and :meth:`GroupBy.cummax` with nullable dtypes incorrectly altering the original data in place (:issue:`46220`)
437+
-
431438

432439
Reshaping
433440
^^^^^^^^^

0 commit comments

Comments
 (0)