Skip to content

Commit f29760c

Browse files
author
MomIsBestFriend
committed
Merge remote-tracking branch 'upstream/master' into TODO-runtimewarning-_libs-lib
2 parents 1314e99 + b6cb1a4 commit f29760c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+993
-678
lines changed

asv_bench/benchmarks/series_methods.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -223,27 +223,27 @@ def time_series_datetimeindex_repr(self):
223223

224224
class All:
225225

226-
params = [[10 ** 3, 10 ** 6], ["fast", "slow"]]
227-
param_names = ["N", "case"]
226+
params = [[10 ** 3, 10 ** 6], ["fast", "slow"], ["bool", "boolean"]]
227+
param_names = ["N", "case", "dtype"]
228228

229-
def setup(self, N, case):
229+
def setup(self, N, case, dtype):
230230
val = case != "fast"
231-
self.s = Series([val] * N)
231+
self.s = Series([val] * N, dtype=dtype)
232232

233-
def time_all(self, N, case):
233+
def time_all(self, N, case, dtype):
234234
self.s.all()
235235

236236

237237
class Any:
238238

239-
params = [[10 ** 3, 10 ** 6], ["fast", "slow"]]
240-
param_names = ["N", "case"]
239+
params = [[10 ** 3, 10 ** 6], ["fast", "slow"], ["bool", "boolean"]]
240+
param_names = ["N", "case", "dtype"]
241241

242-
def setup(self, N, case):
242+
def setup(self, N, case, dtype):
243243
val = case == "fast"
244-
self.s = Series([val] * N)
244+
self.s = Series([val] * N, dtype=dtype)
245245

246-
def time_any(self, N, case):
246+
def time_any(self, N, case, dtype):
247247
self.s.any()
248248

249249

@@ -265,11 +265,14 @@ class NanOps:
265265
"prod",
266266
],
267267
[10 ** 3, 10 ** 6],
268-
["int8", "int32", "int64", "float64"],
268+
["int8", "int32", "int64", "float64", "Int64", "boolean"],
269269
]
270270
param_names = ["func", "N", "dtype"]
271271

272272
def setup(self, func, N, dtype):
273+
if func == "argmax" and dtype in {"Int64", "boolean"}:
274+
# Skip argmax for nullable int since this doesn't work yet (GH-24382)
275+
raise NotImplementedError
273276
self.s = Series([1] * N, dtype=dtype)
274277
self.func = getattr(self.s, func)
275278

asv_bench/benchmarks/stat_ops.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,17 @@
77

88
class FrameOps:
99

10-
params = [ops, ["float", "int"], [0, 1]]
10+
params = [ops, ["float", "int", "Int64"], [0, 1]]
1111
param_names = ["op", "dtype", "axis"]
1212

1313
def setup(self, op, dtype, axis):
14-
df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype)
14+
if op == "mad" and dtype == "Int64" and axis == 1:
15+
# GH-33036
16+
raise NotImplementedError
17+
values = np.random.randn(100000, 4)
18+
if dtype == "Int64":
19+
values = values.astype(int)
20+
df = pd.DataFrame(values).astype(dtype)
1521
self.df_func = getattr(df, op)
1622

1723
def time_op(self, op, dtype, axis):

doc/source/whatsnew/v1.1.0.rst

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,8 @@ Performance improvements
255255
sparse values from ``scipy.sparse`` matrices using the
256256
:meth:`DataFrame.sparse.from_spmatrix` constructor (:issue:`32821`,
257257
:issue:`32825`, :issue:`32826`, :issue:`32856`, :issue:`32858`).
258+
- Performance improvement in :meth:`Series.sum` for nullable (integer and boolean) dtypes (:issue:`30982`).
259+
258260

259261
.. ---------------------------------------------------------------------------
260262
@@ -314,7 +316,7 @@ Conversion
314316
Strings
315317
^^^^^^^
316318

317-
-
319+
- Bug in the :meth:`~Series.astype` method when converting "string" dtype data to nullable integer dtype (:issue:`32450`).
318320
-
319321

320322

@@ -336,7 +338,9 @@ Indexing
336338
- Bug in :meth:`DataFrame.iloc.__setitem__` on a :class:`DataFrame` with duplicate columns incorrectly setting values for all matching columns (:issue:`15686`, :issue:`22036`)
337339
- Bug in :meth:`DataFrame.loc:` and :meth:`Series.loc` with a :class:`DatetimeIndex`, :class:`TimedeltaIndex`, or :class:`PeriodIndex` incorrectly allowing lookups of non-matching datetime-like dtypes (:issue:`32650`)
338340
- Bug in :meth:`Series.__getitem__` indexing with non-standard scalars, e.g. ``np.dtype`` (:issue:`32684`)
341+
- Fix to preserve the ability to index with the "nearest" method with xarray's CFTimeIndex, an :class:`Index` subclass (`pydata/xarray#3751 <https://github.com/pydata/xarray/issues/3751>`_, :issue:`32905`).
339342
- Bug in :class:`Index` constructor where an unhelpful error message was raised for ``numpy`` scalars (:issue:`33017`)
343+
- Bug in :meth:`DataFrame.lookup` incorrectly raising an ``AttributeError`` when ``frame.index`` or ``frame.columns`` is not unique; this will now raise a ``ValueError`` with a helpful error message (:issue:`33041`)
340344

341345
Missing
342346
^^^^^^^
@@ -398,6 +402,8 @@ Groupby/resample/rolling
398402

399403
- Bug in :meth:`GroupBy.apply` raises ``ValueError`` when the ``by`` axis is not sorted and has duplicates and the applied ``func`` does not mutate passed in objects (:issue:`30667`)
400404
- Bug in :meth:`DataFrameGroupby.transform` produces incorrect result with transformation functions (:issue:`30918`)
405+
- Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` produces inconsistent type when aggregating Boolean series (:issue:`32894`)
406+
401407

402408
Reshaping
403409
^^^^^^^^^
@@ -410,13 +416,15 @@ Reshaping
410416
- Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`)
411417
- :meth:`DataFrame.pivot` can now take lists for ``index`` and ``columns`` arguments (:issue:`21425`)
412418
- Bug in :func:`concat` where the resulting indices are not copied when ``copy=True`` (:issue:`29879`)
419+
- Bug where :meth:`Index.astype` would lose the name attribute when converting from ``Float64Index`` to ``Int64Index``, or when casting to an ``ExtensionArray`` dtype (:issue:`32013`)
413420
- :meth:`Series.append` will now raise a ``TypeError`` when passed a DataFrame or a sequence containing Dataframe (:issue:`31413`)
414421
- :meth:`DataFrame.replace` and :meth:`Series.replace` will raise a ``TypeError`` if ``to_replace`` is not an expected type. Previously the ``replace`` would fail silently (:issue:`18634`)
415422
- Bug on inplace operation of a Series that was adding a column to the DataFrame from where it was originally dropped from (using inplace=True) (:issue:`30484`)
416423
- Bug in :meth:`DataFrame.apply` where callback was called with :class:`Series` parameter even though ``raw=True`` requested. (:issue:`32423`)
417424
- Bug in :meth:`DataFrame.pivot_table` losing timezone information when creating a :class:`MultiIndex` level from a column with timezone-aware dtype (:issue:`32558`)
418425
- Bug in :meth:`concat` where when passing a non-dict mapping as ``objs`` would raise a ``TypeError`` (:issue:`32863`)
419426
- :meth:`DataFrame.agg` now provides more descriptive ``SpecificationError`` message when attempting to aggregating non-existant column (:issue:`32755`)
427+
- Bug in :meth:`DataFrame.unstack` when MultiIndexed columns and MultiIndexed rows were used (:issue:`32624`, :issue:`24729` and :issue:`28306`)
420428

421429

422430
Sparse

environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ dependencies:
101101
- s3fs # pandas.read_csv... when using 's3://...' path
102102
- sqlalchemy # pandas.read_sql, DataFrame.to_sql
103103
- xarray # DataFrame.to_xarray
104+
- cftime # Needed for downstream xarray.CFTimeIndex test
104105
- pyreadstat # pandas.read_spss
105106
- tabulate>=0.8.3 # DataFrame.to_markdown
106107
- pip:

0 commit comments

Comments
 (0)