Skip to content

DEPR: Enforce ewm(times=str) deprecation #48842

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Oct 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ Removal of prior version deprecations/changes
- Remove :meth:`DataFrameGroupBy.pad` and :meth:`DataFrameGroupBy.backfill` (:issue:`45076`)
- Remove ``numpy`` argument from :func:`read_json` (:issue:`30636`)
- Removed the ``center`` keyword in :meth:`DataFrame.expanding` (:issue:`20647`)
- Enforced disallowing a string column label into ``times`` in :meth:`DataFrame.ewm` (:issue:`43265`)
- Enforced :meth:`Rolling.count` with ``min_periods=None`` to default to the size of the window (:issue:`31302`)
-

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -12089,7 +12089,7 @@ def ewm(
adjust: bool_t = True,
ignore_na: bool_t = False,
axis: Axis = 0,
times: str | np.ndarray | DataFrame | Series | None = None,
times: np.ndarray | DataFrame | Series | None = None,
method: str = "single",
) -> ExponentialMovingWindow:
axis = self._get_axis_number(axis)
Expand Down
40 changes: 8 additions & 32 deletions pandas/core/window/ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,7 @@
import datetime
from functools import partial
from textwrap import dedent
from typing import (
TYPE_CHECKING,
cast,
)
from typing import TYPE_CHECKING
import warnings

import numpy as np
Expand Down Expand Up @@ -106,7 +103,7 @@ def get_center_of_mass(


def _calculate_deltas(
times: str | np.ndarray | NDFrame | None,
times: np.ndarray | NDFrame,
halflife: float | TimedeltaConvertibleTypes | None,
) -> np.ndarray:
"""
Expand All @@ -115,7 +112,7 @@ def _calculate_deltas(

Parameters
----------
times : str, np.ndarray, Series, default None
times : np.ndarray, Series
Times corresponding to the observations. Must be monotonically increasing
and ``datetime64[ns]`` dtype.
halflife : float, str, timedelta, optional
Expand All @@ -126,13 +123,7 @@ def _calculate_deltas(
np.ndarray
Diff of the times divided by the half-life
"""
# error: Item "str" of "Union[str, ndarray, NDFrameT, None]" has no
# attribute "view"
# error: Item "None" of "Union[str, ndarray, NDFrameT, None]" has no
# attribute "view"
_times = np.asarray(
times.view(np.int64), dtype=np.float64 # type: ignore[union-attr]
)
_times = np.asarray(times.view(np.int64), dtype=np.float64)
# TODO: generalize to non-nano?
_halflife = float(Timedelta(halflife)._as_unit("ns").value)
return np.diff(_times) / _halflife
Expand Down Expand Up @@ -221,7 +212,7 @@ class ExponentialMovingWindow(BaseWindow):

For `Series` this parameter is unused and defaults to 0.

times : str, np.ndarray, Series, default None
times : np.ndarray, Series, default None

.. versionadded:: 1.1.0

Expand All @@ -232,9 +223,6 @@ class ExponentialMovingWindow(BaseWindow):

If 1-D array like, a sequence with the same shape as the observations.

.. deprecated:: 1.4.0
If str, the name of the column in the DataFrame representing the times.

method : str {'single', 'table'}, default 'single'
.. versionadded:: 1.4.0

Expand Down Expand Up @@ -359,7 +347,7 @@ def __init__(
adjust: bool = True,
ignore_na: bool = False,
axis: Axis = 0,
times: str | np.ndarray | NDFrame | None = None,
times: np.ndarray | NDFrame | None = None,
method: str = "single",
*,
selection=None,
Expand All @@ -384,18 +372,6 @@ def __init__(
if self.times is not None:
if not self.adjust:
raise NotImplementedError("times is not supported with adjust=False.")
if isinstance(self.times, str):
warnings.warn(
(
"Specifying times as a string column label is deprecated "
"and will be removed in a future version. Pass the column "
"into times instead."
),
FutureWarning,
stacklevel=find_stack_level(),
)
# self.times cannot be str anymore
self.times = cast("Series", self._selected_obj[self.times])
if not is_datetime64_ns_dtype(self.times):
raise ValueError("times must be datetime64[ns] dtype.")
if len(self.times) != len(obj):
Expand Down Expand Up @@ -897,7 +873,7 @@ def __init__(self, obj, *args, _grouper=None, **kwargs) -> None:
# sort the times and recalculate the deltas according to the groups
groupby_order = np.concatenate(list(self._grouper.indices.values()))
self._deltas = _calculate_deltas(
self.times.take(groupby_order), # type: ignore[union-attr]
self.times.take(groupby_order),
self.halflife,
)

Expand Down Expand Up @@ -928,7 +904,7 @@ def __init__(
adjust: bool = True,
ignore_na: bool = False,
axis: Axis = 0,
times: str | np.ndarray | NDFrame | None = None,
times: np.ndarray | NDFrame | None = None,
engine: str = "numba",
engine_kwargs: dict[str, bool] | None = None,
*,
Expand Down
14 changes: 6 additions & 8 deletions pandas/tests/window/test_ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,15 +236,13 @@ def test_float_dtype_ewma(func, expected, float_numpy_dtype):
tm.assert_frame_equal(result, expected)


def test_times_string_col_deprecated():
def test_times_string_col_raises():
# GH 43265
data = np.arange(10.0)
data[::2] = np.nan
df = DataFrame({"A": data, "time_col": date_range("2000", freq="D", periods=10)})
with tm.assert_produces_warning(FutureWarning, match="Specifying times"):
result = df.ewm(halflife="1 day", min_periods=0, times="time_col").mean()
expected = df.ewm(halflife=1.0, min_periods=0).mean()
tm.assert_frame_equal(result, expected)
df = DataFrame(
{"A": np.arange(10.0), "time_col": date_range("2000", freq="D", periods=10)}
)
with pytest.raises(ValueError, match="times must be datetime64"):
df.ewm(halflife="1 day", min_periods=0, times="time_col")


def test_ewm_sum_adjust_false_notimplemented():
Expand Down
16 changes: 12 additions & 4 deletions pandas/tests/window/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1162,7 +1162,11 @@ def test_times(self, times_frame):
halflife = "23 days"
with tm.assert_produces_warning(FutureWarning, match="nuisance"):
# GH#42738
result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean()
result = (
times_frame.groupby("A")
.ewm(halflife=halflife, times=times_frame["C"])
.mean()
)
expected = DataFrame(
{
"B": [
Expand Down Expand Up @@ -1201,9 +1205,13 @@ def test_times_vs_apply(self, times_frame):
halflife = "23 days"
with tm.assert_produces_warning(FutureWarning, match="nuisance"):
# GH#42738
result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean()
result = (
times_frame.groupby("A")
.ewm(halflife=halflife, times=times_frame["C"])
.mean()
)
expected = times_frame.groupby("A", group_keys=True).apply(
lambda x: x.ewm(halflife=halflife, times="C").mean()
lambda x: x.ewm(halflife=halflife, times=x["C"]).mean()
)
tm.assert_frame_equal(result, expected)

Expand All @@ -1213,7 +1221,7 @@ def test_times_array(self, times_frame):
gb = times_frame.groupby("A")
with tm.assert_produces_warning(FutureWarning, match="nuisance"):
# GH#42738
result = gb.ewm(halflife=halflife, times="C").mean()
result = gb.ewm(halflife=halflife, times=times_frame["C"]).mean()
expected = gb.ewm(halflife=halflife, times=times_frame["C"].values).mean()
tm.assert_frame_equal(result, expected)

Expand Down