diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 4ac737bb6b29a..91ca4c0210d64 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -165,6 +165,7 @@ Removal of prior version deprecations/changes - Remove :meth:`DataFrameGroupBy.pad` and :meth:`DataFrameGroupBy.backfill` (:issue:`45076`) - Remove ``numpy`` argument from :func:`read_json` (:issue:`30636`) - Removed the ``center`` keyword in :meth:`DataFrame.expanding` (:issue:`20647`) +- Enforced disallowing a string column label into ``times`` in :meth:`DataFrame.ewm` (:issue:`43265`) - Enforced :meth:`Rolling.count` with ``min_periods=None`` to default to the size of the window (:issue:`31302`) - diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a30b21d04ff59..82d2082e8292d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -12089,7 +12089,7 @@ def ewm( adjust: bool_t = True, ignore_na: bool_t = False, axis: Axis = 0, - times: str | np.ndarray | DataFrame | Series | None = None, + times: np.ndarray | DataFrame | Series | None = None, method: str = "single", ) -> ExponentialMovingWindow: axis = self._get_axis_number(axis) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 95a6689cec941..eaf165f168a58 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -3,10 +3,7 @@ import datetime from functools import partial from textwrap import dedent -from typing import ( - TYPE_CHECKING, - cast, -) +from typing import TYPE_CHECKING import warnings import numpy as np @@ -106,7 +103,7 @@ def get_center_of_mass( def _calculate_deltas( - times: str | np.ndarray | NDFrame | None, + times: np.ndarray | NDFrame, halflife: float | TimedeltaConvertibleTypes | None, ) -> np.ndarray: """ @@ -115,7 +112,7 @@ def _calculate_deltas( Parameters ---------- - times : str, np.ndarray, Series, default None + times : np.ndarray, Series Times corresponding to the observations. Must be monotonically increasing and ``datetime64[ns]`` dtype. halflife : float, str, timedelta, optional @@ -126,13 +123,7 @@ def _calculate_deltas( np.ndarray Diff of the times divided by the half-life """ - # error: Item "str" of "Union[str, ndarray, NDFrameT, None]" has no - # attribute "view" - # error: Item "None" of "Union[str, ndarray, NDFrameT, None]" has no - # attribute "view" - _times = np.asarray( - times.view(np.int64), dtype=np.float64 # type: ignore[union-attr] - ) + _times = np.asarray(times.view(np.int64), dtype=np.float64) # TODO: generalize to non-nano? _halflife = float(Timedelta(halflife)._as_unit("ns").value) return np.diff(_times) / _halflife @@ -221,7 +212,7 @@ class ExponentialMovingWindow(BaseWindow): For `Series` this parameter is unused and defaults to 0. - times : str, np.ndarray, Series, default None + times : np.ndarray, Series, default None .. versionadded:: 1.1.0 @@ -232,9 +223,6 @@ class ExponentialMovingWindow(BaseWindow): If 1-D array like, a sequence with the same shape as the observations. - .. deprecated:: 1.4.0 - If str, the name of the column in the DataFrame representing the times. - method : str {'single', 'table'}, default 'single' .. versionadded:: 1.4.0 @@ -359,7 +347,7 @@ def __init__( adjust: bool = True, ignore_na: bool = False, axis: Axis = 0, - times: str | np.ndarray | NDFrame | None = None, + times: np.ndarray | NDFrame | None = None, method: str = "single", *, selection=None, @@ -384,18 +372,6 @@ def __init__( if self.times is not None: if not self.adjust: raise NotImplementedError("times is not supported with adjust=False.") - if isinstance(self.times, str): - warnings.warn( - ( - "Specifying times as a string column label is deprecated " - "and will be removed in a future version. Pass the column " - "into times instead." - ), - FutureWarning, - stacklevel=find_stack_level(), - ) - # self.times cannot be str anymore - self.times = cast("Series", self._selected_obj[self.times]) if not is_datetime64_ns_dtype(self.times): raise ValueError("times must be datetime64[ns] dtype.") if len(self.times) != len(obj): @@ -897,7 +873,7 @@ def __init__(self, obj, *args, _grouper=None, **kwargs) -> None: # sort the times and recalculate the deltas according to the groups groupby_order = np.concatenate(list(self._grouper.indices.values())) self._deltas = _calculate_deltas( - self.times.take(groupby_order), # type: ignore[union-attr] + self.times.take(groupby_order), self.halflife, ) @@ -928,7 +904,7 @@ def __init__( adjust: bool = True, ignore_na: bool = False, axis: Axis = 0, - times: str | np.ndarray | NDFrame | None = None, + times: np.ndarray | NDFrame | None = None, engine: str = "numba", engine_kwargs: dict[str, bool] | None = None, *, diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index b524eb5978fa0..3f6574a4b54ea 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -236,15 +236,13 @@ def test_float_dtype_ewma(func, expected, float_numpy_dtype): tm.assert_frame_equal(result, expected) -def test_times_string_col_deprecated(): +def test_times_string_col_raises(): # GH 43265 - data = np.arange(10.0) - data[::2] = np.nan - df = DataFrame({"A": data, "time_col": date_range("2000", freq="D", periods=10)}) - with tm.assert_produces_warning(FutureWarning, match="Specifying times"): - result = df.ewm(halflife="1 day", min_periods=0, times="time_col").mean() - expected = df.ewm(halflife=1.0, min_periods=0).mean() - tm.assert_frame_equal(result, expected) + df = DataFrame( + {"A": np.arange(10.0), "time_col": date_range("2000", freq="D", periods=10)} + ) + with pytest.raises(ValueError, match="times must be datetime64"): + df.ewm(halflife="1 day", min_periods=0, times="time_col") def test_ewm_sum_adjust_false_notimplemented(): diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index 7064d465bb555..688a93223b3f4 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -1162,7 +1162,11 @@ def test_times(self, times_frame): halflife = "23 days" with tm.assert_produces_warning(FutureWarning, match="nuisance"): # GH#42738 - result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean() + result = ( + times_frame.groupby("A") + .ewm(halflife=halflife, times=times_frame["C"]) + .mean() + ) expected = DataFrame( { "B": [ @@ -1201,9 +1205,13 @@ def test_times_vs_apply(self, times_frame): halflife = "23 days" with tm.assert_produces_warning(FutureWarning, match="nuisance"): # GH#42738 - result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean() + result = ( + times_frame.groupby("A") + .ewm(halflife=halflife, times=times_frame["C"]) + .mean() + ) expected = times_frame.groupby("A", group_keys=True).apply( - lambda x: x.ewm(halflife=halflife, times="C").mean() + lambda x: x.ewm(halflife=halflife, times=x["C"]).mean() ) tm.assert_frame_equal(result, expected) @@ -1213,7 +1221,7 @@ def test_times_array(self, times_frame): gb = times_frame.groupby("A") with tm.assert_produces_warning(FutureWarning, match="nuisance"): # GH#42738 - result = gb.ewm(halflife=halflife, times="C").mean() + result = gb.ewm(halflife=halflife, times=times_frame["C"]).mean() expected = gb.ewm(halflife=halflife, times=times_frame["C"].values).mean() tm.assert_frame_equal(result, expected)