Skip to content

Commit 5483590

Browse files
authored
DEPR: Remove silent dropping of nuisance columns in window ops (#50576)
* DEPR: Remove silent dropping of nuisance columns in window ops * Align exception with series and fix test * Fix message * Fix asv * fix asv again * fix asv again
1 parent 222e37d commit 5483590

File tree

8 files changed

+39
-90
lines changed

8 files changed

+39
-90
lines changed

asv_bench/benchmarks/rolling.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ class Groupby:
292292
["sum", "median", "mean", "max", "min", "kurt", "sum"],
293293
[
294294
("rolling", {"window": 2}),
295-
("rolling", {"window": "30s", "on": "C"}),
295+
("rolling", {"window": "30s"}),
296296
("expanding", {}),
297297
],
298298
)
@@ -304,9 +304,10 @@ def setup(self, method, window_kwargs):
304304
{
305305
"A": [str(i) for i in range(N)] * 10,
306306
"B": list(range(N)) * 10,
307-
"C": pd.date_range(start="1900-01-01", freq="1min", periods=N * 10),
308307
}
309308
)
309+
if isinstance(kwargs.get("window", None), str):
310+
df.index = pd.date_range(start="1900-01-01", freq="1min", periods=N * 10)
310311
self.groupby_window = getattr(df.groupby("A"), window)(**kwargs)
311312

312313
def time_method(self, method, window_kwargs):

doc/source/whatsnew/v2.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -729,6 +729,7 @@ Removal of prior version deprecations/changes
729729
- Changed default of ``numeric_only`` to ``False`` in all DataFrame methods with that argument (:issue:`46096`, :issue:`46906`)
730730
- Changed default of ``numeric_only`` to ``False`` in :meth:`Series.rank` (:issue:`47561`)
731731
- Enforced deprecation of silently dropping nuisance columns in groupby and resample operations when ``numeric_only=False`` (:issue:`41475`)
732+
- Enforced deprecation of silently dropping nuisance columns in :class:`Rolling`, :class:`Expanding`, and :class:`ExponentialMovingWindow` ops. This will now raise a :class:`.errors.DataError` (:issue:`42834`)
732733
- Changed behavior in setting values with ``df.loc[:, foo] = bar`` or ``df.iloc[:, foo] = bar``, these now always attempt to set values inplace before falling back to casting (:issue:`45333`)
733734
- Changed default of ``numeric_only`` in various :class:`.DataFrameGroupBy` methods; all methods now default to ``numeric_only=False`` (:issue:`46072`)
734735
- Changed default of ``numeric_only`` to ``False`` in :class:`.Resampler` methods (:issue:`47177`)

pandas/core/window/rolling.py

Lines changed: 9 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
Sized,
1919
cast,
2020
)
21-
import warnings
2221

2322
import numpy as np
2423

@@ -37,7 +36,6 @@
3736
from pandas.compat._optional import import_optional_dependency
3837
from pandas.errors import DataError
3938
from pandas.util._decorators import doc
40-
from pandas.util._exceptions import find_stack_level
4139

4240
from pandas.core.dtypes.common import (
4341
ensure_float64,
@@ -473,24 +471,23 @@ def _apply_blockwise(
473471
obj = notna(obj).astype(int)
474472
obj._mgr = obj._mgr.consolidate()
475473

476-
def hfunc(values: ArrayLike) -> ArrayLike:
477-
values = self._prep_values(values)
478-
return homogeneous_func(values)
479-
480474
if self.axis == 1:
481475
obj = obj.T
482476

483477
taker = []
484478
res_values = []
485479
for i, arr in enumerate(obj._iter_column_arrays()):
486480
# GH#42736 operate column-wise instead of block-wise
481+
# As of 2.0, hfunc will raise for nuisance columns
487482
try:
488-
res = hfunc(arr)
489-
except (TypeError, NotImplementedError):
490-
pass
491-
else:
492-
res_values.append(res)
493-
taker.append(i)
483+
arr = self._prep_values(arr)
484+
except (TypeError, NotImplementedError) as err:
485+
raise DataError(
486+
f"Cannot aggregate non-numeric type: {arr.dtype}"
487+
) from err
488+
res = homogeneous_func(arr)
489+
res_values.append(res)
490+
taker.append(i)
494491

495492
index = self._slice_axis_for_step(
496493
obj.index, res_values[0] if len(res_values) > 0 else None
@@ -505,18 +502,6 @@ def hfunc(values: ArrayLike) -> ArrayLike:
505502
if self.axis == 1:
506503
df = df.T
507504

508-
if 0 != len(res_values) != len(obj.columns):
509-
# GH#42738 ignore_failures dropped nuisance columns
510-
dropped = obj.columns.difference(obj.columns.take(taker))
511-
warnings.warn(
512-
"Dropping of nuisance columns in rolling operations "
513-
"is deprecated; in a future version this will raise TypeError. "
514-
"Select only valid columns before calling the operation. "
515-
f"Dropped columns were {dropped}",
516-
FutureWarning,
517-
stacklevel=find_stack_level(),
518-
)
519-
520505
return self._resolve_output(df, obj)
521506

522507
def _apply_tablewise(

pandas/tests/window/test_api.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import numpy as np
22
import pytest
33

4-
from pandas.errors import SpecificationError
4+
from pandas.errors import (
5+
DataError,
6+
SpecificationError,
7+
)
58

69
from pandas import (
710
DataFrame,
@@ -66,18 +69,12 @@ def tests_skip_nuisance(step):
6669
tm.assert_frame_equal(result, expected)
6770

6871

69-
def test_skip_sum_object_raises(step):
72+
def test_sum_object_str_raises(step):
7073
df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"})
7174
r = df.rolling(window=3, step=step)
72-
msg = r"nuisance columns.*Dropped columns were Index\(\['C'\], dtype='object'\)"
73-
with tm.assert_produces_warning(FutureWarning, match=msg):
74-
# GH#42738
75-
result = r.sum()
76-
expected = DataFrame(
77-
{"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]},
78-
columns=list("AB"),
79-
)[::step]
80-
tm.assert_frame_equal(result, expected)
75+
with pytest.raises(DataError, match="Cannot aggregate non-numeric type: object"):
76+
# GH#42738, enforced in 2.0
77+
r.sum()
8178

8279

8380
def test_agg(step):

pandas/tests/window/test_dtypes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ def test_dataframe_dtypes(method, expected_data, dtypes, min_periods, step):
165165
rolled = df.rolling(2, min_periods=min_periods, step=step)
166166

167167
if dtypes in ("m8[ns]", "M8[ns]", "datetime64[ns, UTC]") and method != "count":
168-
msg = "No numeric types to aggregate"
168+
msg = "Cannot aggregate non-numeric type"
169169
with pytest.raises(DataError, match=msg):
170170
getattr(rolled, method)()
171171
else:

pandas/tests/window/test_ewm.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,9 @@ def test_ewma_with_times_equal_spacing(halflife_with_times, times, min_periods):
9898
halflife = halflife_with_times
9999
data = np.arange(10.0)
100100
data[::2] = np.nan
101-
df = DataFrame({"A": data, "time_col": date_range("2000", freq="D", periods=10)})
102-
with tm.assert_produces_warning(FutureWarning, match="nuisance columns"):
103-
# GH#42738
104-
result = df.ewm(halflife=halflife, min_periods=min_periods, times=times).mean()
105-
expected = df.ewm(halflife=1.0, min_periods=min_periods).mean()
101+
df = DataFrame({"A": data})
102+
result = df.ewm(halflife=halflife, min_periods=min_periods, times=times).mean()
103+
expected = df.ewm(halflife=1.0, min_periods=min_periods).mean()
106104
tm.assert_frame_equal(result, expected)
107105

108106

pandas/tests/window/test_groupby.py

Lines changed: 6 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1125,13 +1125,6 @@ def test_methods(self, method, expected_data):
11251125
)
11261126
tm.assert_frame_equal(result, expected)
11271127

1128-
with tm.assert_produces_warning(FutureWarning, match="nuisance"):
1129-
# GH#42738
1130-
expected = df.groupby("A", group_keys=True).apply(
1131-
lambda x: getattr(x.ewm(com=1.0), method)()
1132-
)
1133-
tm.assert_frame_equal(result, expected)
1134-
11351128
@pytest.mark.parametrize(
11361129
"method, expected_data",
11371130
[["corr", [np.nan, 1.0, 1.0, 1]], ["cov", [np.nan, 0.5, 0.928571, 1.385714]]],
@@ -1160,13 +1153,9 @@ def test_pairwise_methods(self, method, expected_data):
11601153
def test_times(self, times_frame):
11611154
# GH 40951
11621155
halflife = "23 days"
1163-
with tm.assert_produces_warning(FutureWarning, match="nuisance"):
1164-
# GH#42738
1165-
result = (
1166-
times_frame.groupby("A")
1167-
.ewm(halflife=halflife, times=times_frame["C"])
1168-
.mean()
1169-
)
1156+
# GH#42738
1157+
times = times_frame.pop("C")
1158+
result = times_frame.groupby("A").ewm(halflife=halflife, times=times).mean()
11701159
expected = DataFrame(
11711160
{
11721161
"B": [
@@ -1200,29 +1189,13 @@ def test_times(self, times_frame):
12001189
)
12011190
tm.assert_frame_equal(result, expected)
12021191

1203-
def test_times_vs_apply(self, times_frame):
1204-
# GH 40951
1205-
halflife = "23 days"
1206-
with tm.assert_produces_warning(FutureWarning, match="nuisance"):
1207-
# GH#42738
1208-
result = (
1209-
times_frame.groupby("A")
1210-
.ewm(halflife=halflife, times=times_frame["C"])
1211-
.mean()
1212-
)
1213-
expected = times_frame.groupby("A", group_keys=True).apply(
1214-
lambda x: x.ewm(halflife=halflife, times=x["C"]).mean()
1215-
)
1216-
tm.assert_frame_equal(result, expected)
1217-
12181192
def test_times_array(self, times_frame):
12191193
# GH 40951
12201194
halflife = "23 days"
1195+
times = times_frame.pop("C")
12211196
gb = times_frame.groupby("A")
1222-
with tm.assert_produces_warning(FutureWarning, match="nuisance"):
1223-
# GH#42738
1224-
result = gb.ewm(halflife=halflife, times=times_frame["C"]).mean()
1225-
expected = gb.ewm(halflife=halflife, times=times_frame["C"].values).mean()
1197+
result = gb.ewm(halflife=halflife, times=times).mean()
1198+
expected = gb.ewm(halflife=halflife, times=times.values).mean()
12261199
tm.assert_frame_equal(result, expected)
12271200

12281201
def test_dont_mutate_obj_after_slicing(self):

pandas/tests/window/test_numba.py

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -253,35 +253,32 @@ def test_invalid_engine_kwargs(self, grouper, method):
253253
def test_cython_vs_numba(
254254
self, grouper, method, nogil, parallel, nopython, ignore_na, adjust
255255
):
256+
df = DataFrame({"B": range(4)})
256257
if grouper == "None":
257258
grouper = lambda x: x
258-
warn = FutureWarning
259259
else:
260+
df["A"] = ["a", "b", "a", "b"]
260261
grouper = lambda x: x.groupby("A")
261-
warn = None
262262
if method == "sum":
263263
adjust = True
264-
df = DataFrame({"A": ["a", "b", "a", "b"], "B": range(4)})
265264
ewm = grouper(df).ewm(com=1.0, adjust=adjust, ignore_na=ignore_na)
266265

267266
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
268-
with tm.assert_produces_warning(warn, match="nuisance"):
269-
# GH#42738
270-
result = getattr(ewm, method)(engine="numba", engine_kwargs=engine_kwargs)
271-
expected = getattr(ewm, method)(engine="cython")
267+
result = getattr(ewm, method)(engine="numba", engine_kwargs=engine_kwargs)
268+
expected = getattr(ewm, method)(engine="cython")
272269

273270
tm.assert_frame_equal(result, expected)
274271

275272
@pytest.mark.parametrize("grouper", ["None", "groupby"])
276273
def test_cython_vs_numba_times(self, grouper, nogil, parallel, nopython, ignore_na):
277274
# GH 40951
278275

276+
df = DataFrame({"B": [0, 0, 1, 1, 2, 2]})
279277
if grouper == "None":
280278
grouper = lambda x: x
281-
warn = FutureWarning
282279
else:
283280
grouper = lambda x: x.groupby("A")
284-
warn = None
281+
df["A"] = ["a", "b", "a", "b", "b", "a"]
285282

286283
halflife = "23 days"
287284
times = to_datetime(
@@ -294,17 +291,14 @@ def test_cython_vs_numba_times(self, grouper, nogil, parallel, nopython, ignore_
294291
"2020-01-03",
295292
]
296293
)
297-
df = DataFrame({"A": ["a", "b", "a", "b", "b", "a"], "B": [0, 0, 1, 1, 2, 2]})
298294
ewm = grouper(df).ewm(
299295
halflife=halflife, adjust=True, ignore_na=ignore_na, times=times
300296
)
301297

302298
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
303299

304-
with tm.assert_produces_warning(warn, match="nuisance"):
305-
# GH#42738
306-
result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
307-
expected = ewm.mean(engine="cython")
300+
result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
301+
expected = ewm.mean(engine="cython")
308302

309303
tm.assert_frame_equal(result, expected)
310304

0 commit comments

Comments
 (0)