-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Use ea interface to calculate accumulator functions for datetimelike #50297
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
79f9cd4
6fd82e3
4aa5019
d9653c0
4ffa497
8305cf3
9487b86
eb55b3c
47c3730
ed46798
d1fa417
f72cc65
177284a
589218d
93215ae
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
""" | ||
datetimelke_accumulations.py is for accumulations of datetimelike extension arrays | ||
""" | ||
|
||
from __future__ import annotations | ||
|
||
from typing import Callable | ||
|
||
import numpy as np | ||
|
||
from pandas._libs import iNaT | ||
|
||
from pandas.core.dtypes.missing import isna | ||
|
||
|
||
def _cum_func( | ||
func: Callable, | ||
values: np.ndarray, | ||
*, | ||
skipna: bool = True, | ||
): | ||
""" | ||
Accumulations for 1D datetimelike arrays. | ||
|
||
Parameters | ||
---------- | ||
func : np.cumsum, np.cumprod, np.maximum.accumulate, np.minimum.accumulate | ||
values : np.ndarray | ||
Numpy array with the values (can be of any dtype that support the | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
operation). | ||
skipna : bool, default True | ||
Whether to skip NA. | ||
""" | ||
try: | ||
fill_value = { | ||
np.cumprod: 1, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is np.cumprod relevant? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm I guess we can remove it |
||
np.maximum.accumulate: np.iinfo(np.int64).min, | ||
np.cumsum: 0, | ||
mroeschke marked this conversation as resolved.
Show resolved
Hide resolved
|
||
np.minimum.accumulate: np.iinfo(np.int64).max, | ||
}[func] | ||
except KeyError: | ||
raise ValueError(f"No accumulation for {func} implemented on BaseMaskedArray") | ||
|
||
mask = isna(values) | ||
y = values.view("i8") | ||
y[mask] = fill_value | ||
|
||
if not skipna: | ||
mask = np.maximum.accumulate(mask) | ||
|
||
result = func(y) | ||
result[mask] = iNaT | ||
|
||
if values.dtype.kind in ["m", "M"]: | ||
return result.view(values.dtype.base) | ||
return result | ||
|
||
|
||
def cumsum(values: np.ndarray, *, skipna: bool = True): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep good point, this one works. The others aren't that easy... |
||
return _cum_func(np.cumsum, values, skipna=skipna) | ||
|
||
|
||
def cumprod(values: np.ndarray, *, skipna: bool = True): | ||
return _cum_func(np.cumprod, values, skipna=skipna) | ||
|
||
|
||
def cummin(values: np.ndarray, *, skipna: bool = True): | ||
return _cum_func(np.minimum.accumulate, values, skipna=skipna) | ||
|
||
|
||
def cummax(values: np.ndarray, *, skipna: bool = True): | ||
return _cum_func(np.maximum.accumulate, values, skipna=skipna) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -122,6 +122,7 @@ | |
isin, | ||
unique1d, | ||
) | ||
from pandas.core.array_algos import datetimelike_accumulations | ||
from pandas.core.arraylike import OpsMixin | ||
from pandas.core.arrays._mixins import ( | ||
NDArrayBackedExtensionArray, | ||
|
@@ -1380,25 +1381,15 @@ def _addsub_object_array(self, other: np.ndarray, op): | |
return result | ||
|
||
def _accumulate(self, name: str, *, skipna: bool = True, **kwargs): | ||
if name not in {"cummin", "cummax"}: | ||
raise TypeError(f"Accumulation {name} not supported for {type(self)}") | ||
|
||
if is_period_dtype(self.dtype): | ||
data = self | ||
else: | ||
# Incompatible types in assignment (expression has type | ||
# "ndarray[Any, Any]", variable has type "DatetimeLikeArrayMixin" | ||
data = self._ndarray.copy() # type: ignore[assignment] | ||
|
||
if name in {"cummin", "cummax"}: | ||
func = np.minimum.accumulate if name == "cummin" else np.maximum.accumulate | ||
result = cast(np.ndarray, nanops.na_accum_func(data, func, skipna=skipna)) | ||
|
||
# error: Unexpected keyword argument "freq" for | ||
# "_simple_new" of "NDArrayBacked" [call-arg] | ||
return type(self)._simple_new( | ||
result, freq=self.freq, dtype=self.dtype # type: ignore[call-arg] | ||
) | ||
op = getattr(datetimelike_accumulations, name) | ||
result = op(self.copy(), skipna=skipna, **kwargs) | ||
|
||
raise TypeError(f"Accumulation {name} not supported for {type(self)}") | ||
return type(self)._simple_new( | ||
result, freq=self.freq, dtype=self.dtype # type: ignore[call-arg] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i dont think retaining self.freq is going to be correct in general There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you elaborate when we should set freq to None? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pretty much anything that isn't a no-op should be not-freq-preserving. e.g. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thx, makes sense. Should we retain the freq in cases where we only have one element or not retain in general? Not easy to hit btw :) But added a test now There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. id just not-retain in general There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good, that's how I implemented it right now |
||
) | ||
|
||
@unpack_zerodim_and_defer("__add__") | ||
def __add__(self, other): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1712,52 +1712,7 @@ def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike: | |
}[accum_func] | ||
|
||
# We will be applying this function to block values | ||
if values.dtype.kind in ["m", "M"]: | ||
# GH#30460, GH#29058 | ||
# numpy 1.18 started sorting NaTs at the end instead of beginning, | ||
# so we need to work around to maintain backwards-consistency. | ||
orig_dtype = values.dtype | ||
|
||
# We need to define mask before masking NaTs | ||
mask = isna(values) | ||
|
||
y = values.view("i8") | ||
# Note: the accum_func comparison fails as an "is" comparison | ||
changed = accum_func == np.minimum.accumulate | ||
|
||
try: | ||
if changed: | ||
y[mask] = lib.i8max | ||
|
||
result = accum_func(y, axis=0) | ||
finally: | ||
if changed: | ||
# restore NaT elements | ||
y[mask] = iNaT | ||
|
||
if skipna: | ||
result[mask] = iNaT | ||
elif accum_func == np.minimum.accumulate: | ||
# Restore NaTs that we masked previously | ||
nz = (~np.asarray(mask)).nonzero()[0] | ||
if len(nz): | ||
# everything up to the first non-na entry stays NaT | ||
result[: nz[0]] = iNaT | ||
|
||
if isinstance(values.dtype, np.dtype): | ||
result = result.view(orig_dtype) | ||
else: | ||
# DatetimeArray/TimedeltaArray | ||
# TODO: have this case go through a DTA method? | ||
# For DatetimeTZDtype, view result as M8[ns] | ||
npdtype = orig_dtype if isinstance(orig_dtype, np.dtype) else "M8[ns]" | ||
# Item "type" of "Union[Type[ExtensionArray], Type[ndarray[Any, Any]]]" | ||
# has no attribute "_simple_new" | ||
result = type(values)._simple_new( # type: ignore[union-attr] | ||
result.view(npdtype), dtype=orig_dtype | ||
) | ||
|
||
elif skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)): | ||
if skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe a comment/check that "mM" cases should not get here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (OK for follow-up, i can add this into my next "assorted" branch) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added an assert |
||
vals = values.copy() | ||
mask = isna(vals) | ||
vals[mask] = mask_a | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -70,12 +70,12 @@ def test_cummin_cummax(self, datetime_series, method): | |
[ | ||
"cummax", | ||
False, | ||
["NaT", "2 days", "2 days", "2 days", "2 days", "3 days"], | ||
["NaT", "NaT", "NaT", "NaT", "NaT", "NaT"], | ||
], | ||
[ | ||
"cummin", | ||
False, | ||
["NaT", "2 days", "2 days", "1 days", "1 days", "1 days"], | ||
["NaT", "NaT", "NaT", "NaT", "NaT", "NaT"], | ||
], | ||
], | ||
) | ||
|
@@ -91,6 +91,26 @@ def test_cummin_cummax_datetimelike(self, ts, method, skipna, exp_tdi): | |
result = getattr(ser, method)(skipna=skipna) | ||
tm.assert_series_equal(expected, result) | ||
|
||
@pytest.mark.parametrize( | ||
"func, exp", | ||
[ | ||
("cummin", pd.Period("2012-1-1", freq="D")), | ||
("cummax", pd.Period("2012-1-2", freq="D")), | ||
], | ||
) | ||
def test_cummin_cummax_period(self, func, exp): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks for checking this |
||
# GH#28385 | ||
ser = pd.Series( | ||
[pd.Period("2012-1-1", freq="D"), pd.NaT, pd.Period("2012-1-2", freq="D")] | ||
) | ||
result = getattr(ser, func)(skipna=False) | ||
expected = pd.Series([pd.Period("2012-1-1", freq="D"), pd.NaT, pd.NaT]) | ||
tm.assert_series_equal(result, expected) | ||
|
||
result = getattr(ser, func)(skipna=True) | ||
expected = pd.Series([pd.Period("2012-1-1", freq="D"), pd.NaT, exp]) | ||
tm.assert_series_equal(result, expected) | ||
|
||
@pytest.mark.parametrize( | ||
"arg", | ||
[ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
thx removed