From 297affd86ab307ab52a149297dfc6e4e21b26731 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 28 Sep 2022 11:46:27 -0700 Subject: [PATCH 1/7] DEPR: Enforce ewm(times=str) deprecation --- pandas/core/window/ewm.py | 22 ++-------------------- pandas/tests/window/test_ewm.py | 11 ----------- 2 files changed, 2 insertions(+), 31 deletions(-) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 6fae119bffdf1..a34e1f648634d 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -4,10 +4,7 @@ from functools import partial import inspect from textwrap import dedent -from typing import ( - TYPE_CHECKING, - cast, -) +from typing import TYPE_CHECKING import warnings import numpy as np @@ -221,7 +218,7 @@ class ExponentialMovingWindow(BaseWindow): For `Series` this parameter is unused and defaults to 0. - times : str, np.ndarray, Series, default None + times : np.ndarray, Series, default None .. versionadded:: 1.1.0 @@ -232,9 +229,6 @@ class ExponentialMovingWindow(BaseWindow): If 1-D array like, a sequence with the same shape as the observations. - .. deprecated:: 1.4.0 - If str, the name of the column in the DataFrame representing the times. - method : str {'single', 'table'}, default 'single' .. versionadded:: 1.4.0 @@ -384,18 +378,6 @@ def __init__( if self.times is not None: if not self.adjust: raise NotImplementedError("times is not supported with adjust=False.") - if isinstance(self.times, str): - warnings.warn( - ( - "Specifying times as a string column label is deprecated " - "and will be removed in a future version. Pass the column " - "into times instead." - ), - FutureWarning, - stacklevel=find_stack_level(inspect.currentframe()), - ) - # self.times cannot be str anymore - self.times = cast("Series", self._selected_obj[self.times]) if not is_datetime64_ns_dtype(self.times): raise ValueError("times must be datetime64[ns] dtype.") if len(self.times) != len(obj): diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index b524eb5978fa0..93986771186c2 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -236,17 +236,6 @@ def test_float_dtype_ewma(func, expected, float_numpy_dtype): tm.assert_frame_equal(result, expected) -def test_times_string_col_deprecated(): - # GH 43265 - data = np.arange(10.0) - data[::2] = np.nan - df = DataFrame({"A": data, "time_col": date_range("2000", freq="D", periods=10)}) - with tm.assert_produces_warning(FutureWarning, match="Specifying times"): - result = df.ewm(halflife="1 day", min_periods=0, times="time_col").mean() - expected = df.ewm(halflife=1.0, min_periods=0).mean() - tm.assert_frame_equal(result, expected) - - def test_ewm_sum_adjust_false_notimplemented(): data = Series(range(1)).ewm(com=1, adjust=False) with pytest.raises(NotImplementedError, match="sum is not"): From 88aaefdae18a151ae764cba4466d07f2d32723cb Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 28 Sep 2022 11:48:26 -0700 Subject: [PATCH 2/7] Fix typing --- pandas/core/window/ewm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index a34e1f648634d..e1b3dbfada31c 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -910,7 +910,7 @@ def __init__( adjust: bool = True, ignore_na: bool = False, axis: Axis = 0, - times: str | np.ndarray | NDFrame | None = None, + times: np.ndarray | NDFrame | None = None, engine: str = "numba", engine_kwargs: dict[str, bool] | None = None, *, From ec846c49a78531da4089b85556a94b171598d3d0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 29 Sep 2022 14:01:48 -0700 Subject: [PATCH 3/7] Fix test --- pandas/tests/window/test_groupby.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index 5f4805eaa01d2..161baca1dfc20 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -1164,7 +1164,11 @@ def test_times(self, times_frame): halflife = "23 days" with tm.assert_produces_warning(FutureWarning, match="nuisance"): # GH#42738 - result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean() + result = ( + times_frame.groupby("A") + .ewm(halflife=halflife, times=times_frame["C"]) + .mean() + ) expected = DataFrame( { "B": [ @@ -1203,9 +1207,13 @@ def test_times_vs_apply(self, times_frame): halflife = "23 days" with tm.assert_produces_warning(FutureWarning, match="nuisance"): # GH#42738 - result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean() + result = ( + times_frame.groupby("A") + .ewm(halflife=halflife, times=times_frame["C"]) + .mean() + ) expected = times_frame.groupby("A", group_keys=True).apply( - lambda x: x.ewm(halflife=halflife, times="C").mean() + lambda x: x.ewm(halflife=halflife, times=x["C"]).mean() ) tm.assert_frame_equal(result, expected) @@ -1215,7 +1223,7 @@ def test_times_array(self, times_frame): gb = times_frame.groupby("A") with tm.assert_produces_warning(FutureWarning, match="nuisance"): # GH#42738 - result = gb.ewm(halflife=halflife, times="C").mean() + result = gb.ewm(halflife=halflife, times=times_frame["C"]).mean() expected = gb.ewm(halflife=halflife, times=times_frame["C"].values).mean() tm.assert_frame_equal(result, expected) From b30768ef73a985f45ac17945a41889820702bb2c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 13 Oct 2022 11:41:21 -0700 Subject: [PATCH 4/7] convert test to raises --- pandas/tests/window/test_ewm.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index 93986771186c2..3f6574a4b54ea 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -236,6 +236,15 @@ def test_float_dtype_ewma(func, expected, float_numpy_dtype): tm.assert_frame_equal(result, expected) +def test_times_string_col_raises(): + # GH 43265 + df = DataFrame( + {"A": np.arange(10.0), "time_col": date_range("2000", freq="D", periods=10)} + ) + with pytest.raises(ValueError, match="times must be datetime64"): + df.ewm(halflife="1 day", min_periods=0, times="time_col") + + def test_ewm_sum_adjust_false_notimplemented(): data = Series(range(1)).ewm(com=1, adjust=False) with pytest.raises(NotImplementedError, match="sum is not"): From 13449ffce5817e088d6501b38cbd00ff5371096f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 19 Oct 2022 16:56:49 -0700 Subject: [PATCH 5/7] Add whatsnew --- doc/source/whatsnew/v2.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index e57ba92267855..05626cc9f5394 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -148,6 +148,7 @@ Removal of prior version deprecations/changes - Removed :func:`is_extension_type` in favor of :func:`is_extension_array_dtype` (:issue:`29457`) - Remove :meth:`DataFrameGroupBy.pad` and :meth:`DataFrameGroupBy.backfill` (:issue:`45076`) - Enforced :meth:`Rolling.count` with ``min_periods=None`` to default to the size of the window (:issue:`31302`) +- Enforced disallowing a string column label into ``times`` in :meth:`DataFrame.ewm` (:issue:`43265`) .. --------------------------------------------------------------------------- .. _whatsnew_200.performance: From bbb2f507258a0089c5a4c94aebc41f26a7651d68 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 20 Oct 2022 11:58:53 -0700 Subject: [PATCH 6/7] Fix typing --- pandas/core/window/ewm.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 33f2973167d28..eaf165f168a58 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -103,7 +103,7 @@ def get_center_of_mass( def _calculate_deltas( - times: str | np.ndarray | NDFrame | None, + times: np.ndarray | NDFrame, halflife: float | TimedeltaConvertibleTypes | None, ) -> np.ndarray: """ @@ -112,7 +112,7 @@ def _calculate_deltas( Parameters ---------- - times : str, np.ndarray, Series, default None + times : np.ndarray, Series Times corresponding to the observations. Must be monotonically increasing and ``datetime64[ns]`` dtype. halflife : float, str, timedelta, optional @@ -123,13 +123,7 @@ def _calculate_deltas( np.ndarray Diff of the times divided by the half-life """ - # error: Item "str" of "Union[str, ndarray, NDFrameT, None]" has no - # attribute "view" - # error: Item "None" of "Union[str, ndarray, NDFrameT, None]" has no - # attribute "view" - _times = np.asarray( - times.view(np.int64), dtype=np.float64 # type: ignore[union-attr] - ) + _times = np.asarray(times.view(np.int64), dtype=np.float64) # TODO: generalize to non-nano? _halflife = float(Timedelta(halflife)._as_unit("ns").value) return np.diff(_times) / _halflife @@ -353,7 +347,7 @@ def __init__( adjust: bool = True, ignore_na: bool = False, axis: Axis = 0, - times: str | np.ndarray | NDFrame | None = None, + times: np.ndarray | NDFrame | None = None, method: str = "single", *, selection=None, @@ -879,7 +873,7 @@ def __init__(self, obj, *args, _grouper=None, **kwargs) -> None: # sort the times and recalculate the deltas according to the groups groupby_order = np.concatenate(list(self._grouper.indices.values())) self._deltas = _calculate_deltas( - self.times.take(groupby_order), # type: ignore[union-attr] + self.times.take(groupby_order), self.halflife, ) From e8877fef402d8e8b51b0c47bc76723617f9598d1 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 20 Oct 2022 14:19:49 -0700 Subject: [PATCH 7/7] fix typing --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a30b21d04ff59..82d2082e8292d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -12089,7 +12089,7 @@ def ewm( adjust: bool_t = True, ignore_na: bool_t = False, axis: Axis = 0, - times: str | np.ndarray | DataFrame | Series | None = None, + times: np.ndarray | DataFrame | Series | None = None, method: str = "single", ) -> ExponentialMovingWindow: axis = self._get_axis_number(axis)