From 18eaed00826c82deaa190f96ceccba5b16bf6e50 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 4 Feb 2023 18:56:40 -0800 Subject: [PATCH] TYP: resample --- pandas/core/generic.py | 2 +- pandas/core/resample.py | 90 ++++++++++++++++++++++++++--------------- 2 files changed, 58 insertions(+), 34 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6e893c46acfdb..6008e6b6cb566 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8783,7 +8783,7 @@ def resample( axis = self._get_axis_number(axis) return get_resampler( - self, + cast("Series | DataFrame", self), freq=rule, label=label, closed=closed, diff --git a/pandas/core/resample.py b/pandas/core/resample.py index bb716628114ea..52ccef0f5b602 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -129,6 +129,7 @@ class Resampler(BaseGroupBy, PandasObject): grouper: BinGrouper _timegrouper: TimeGrouper + binner: DatetimeIndex | TimedeltaIndex | PeriodIndex # depends on subclass exclusions: frozenset[Hashable] = frozenset() # for SelectionMixin compat # to the groupby descriptor @@ -145,15 +146,15 @@ class Resampler(BaseGroupBy, PandasObject): def __init__( self, - obj: DataFrame | Series, - groupby: TimeGrouper, + obj: NDFrame, + timegrouper: TimeGrouper, axis: Axis = 0, kind=None, *, group_keys: bool | lib.NoDefault = lib.no_default, selection=None, ) -> None: - self._timegrouper = groupby + self._timegrouper = timegrouper self.keys = None self.sort = True # error: Incompatible types in assignment (expression has type "Union @@ -463,7 +464,7 @@ def _groupby_and_aggregate(self, how, *args, **kwargs): return self._wrap_result(result) - def _get_resampler_for_grouping(self, groupby, key=None): + def _get_resampler_for_grouping(self, groupby: GroupBy, key): """ Return the correct class for resampling with groupby. """ @@ -1142,18 +1143,29 @@ class _GroupByMixin(PandasObject): _attributes: list[str] # in practice the same as Resampler._attributes _selection: IndexLabel | None = None + _groupby: GroupBy + _timegrouper: TimeGrouper - def __init__(self, *, parent: Resampler, groupby=None, key=None, **kwargs) -> None: + def __init__( + self, + *, + parent: Resampler, + groupby: GroupBy, + key=None, + selection: IndexLabel | None = None, + ) -> None: # reached via ._gotitem and _get_resampler_for_grouping + assert isinstance(groupby, GroupBy), type(groupby) + # parent is always a Resampler, sometimes a _GroupByMixin assert isinstance(parent, Resampler), type(parent) # initialize our GroupByMixin object with # the resampler attributes for attr in self._attributes: - setattr(self, attr, kwargs.get(attr, getattr(parent, attr))) - self._selection = kwargs.get("selection") + setattr(self, attr, getattr(parent, attr)) + self._selection = selection self.binner = parent.binner self.key = key @@ -1169,7 +1181,7 @@ def _apply(self, f, *args, **kwargs): """ def func(x): - x = self._resampler_cls(x, groupby=self._timegrouper) + x = self._resampler_cls(x, timegrouper=self._timegrouper) if isinstance(f, str): return getattr(x, f)(**kwargs) @@ -1201,10 +1213,6 @@ def _gotitem(self, key, ndim, subset=None): # error: "GotItemMixin" has no attribute "obj" subset = self.obj # type: ignore[attr-defined] - # we need to make a shallow copy of ourselves - # with the same groupby - kwargs = {attr: getattr(self, attr) for attr in self._attributes} - # Try to select from a DataFrame, falling back to a Series try: if isinstance(key, list) and self.key not in key and self.key is not None: @@ -1223,7 +1231,6 @@ def _gotitem(self, key, ndim, subset=None): groupby=groupby, parent=cast(Resampler, self), selection=selection, - **kwargs, ) return new_rs @@ -1499,9 +1506,7 @@ def _resampler_cls(self): return TimedeltaIndexResampler -def get_resampler( - obj, kind=None, **kwds -) -> DatetimeIndexResampler | PeriodIndexResampler | TimedeltaIndexResampler: +def get_resampler(obj: Series | DataFrame, kind=None, **kwds) -> Resampler: """ Create a TimeGrouper and return our resampler. """ @@ -1513,8 +1518,15 @@ def get_resampler( def get_resampler_for_grouping( - groupby, rule, how=None, fill_method=None, limit=None, kind=None, on=None, **kwargs -): + groupby: GroupBy, + rule, + how=None, + fill_method=None, + limit=None, + kind=None, + on=None, + **kwargs, +) -> Resampler: """ Return our appropriate resampler when grouping as well. """ @@ -1641,19 +1653,19 @@ def __init__( super().__init__(freq=freq, axis=axis, **kwargs) - def _get_resampler(self, obj, kind=None): + def _get_resampler(self, obj: NDFrame, kind=None) -> Resampler: """ Return my resampler or raise if we have an invalid axis. Parameters ---------- - obj : input object + obj : Series or DataFrame kind : string, optional 'period','timestamp','timedelta' are valid Returns ------- - a Resampler + Resampler Raises ------ @@ -1665,15 +1677,23 @@ def _get_resampler(self, obj, kind=None): ax = self.ax if isinstance(ax, DatetimeIndex): return DatetimeIndexResampler( - obj, groupby=self, kind=kind, axis=self.axis, group_keys=self.group_keys + obj, + timegrouper=self, + kind=kind, + axis=self.axis, + group_keys=self.group_keys, ) elif isinstance(ax, PeriodIndex) or kind == "period": return PeriodIndexResampler( - obj, groupby=self, kind=kind, axis=self.axis, group_keys=self.group_keys + obj, + timegrouper=self, + kind=kind, + axis=self.axis, + group_keys=self.group_keys, ) elif isinstance(ax, TimedeltaIndex): return TimedeltaIndexResampler( - obj, groupby=self, axis=self.axis, group_keys=self.group_keys + obj, timegrouper=self, axis=self.axis, group_keys=self.group_keys ) raise TypeError( @@ -1682,10 +1702,12 @@ def _get_resampler(self, obj, kind=None): f"but got an instance of '{type(ax).__name__}'" ) - def _get_grouper(self, obj, validate: bool = True): + def _get_grouper( + self, obj: NDFrameT, validate: bool = True + ) -> tuple[BinGrouper, NDFrameT]: # create the resampler and return our binner r = self._get_resampler(obj) - return r.grouper, r.obj + return r.grouper, cast(NDFrameT, r.obj) def _get_time_bins(self, ax: DatetimeIndex): if not isinstance(ax, DatetimeIndex): @@ -1750,19 +1772,21 @@ def _get_time_bins(self, ax: DatetimeIndex): return binner, bins, labels - def _adjust_bin_edges(self, binner, ax_values): + def _adjust_bin_edges( + self, binner: DatetimeIndex, ax_values: npt.NDArray[np.int64] + ) -> tuple[DatetimeIndex, npt.NDArray[np.int64]]: # Some hacks for > daily data, see #1471, #1458, #1483 if self.freq != "D" and is_superperiod(self.freq, "D"): if self.closed == "right": # GH 21459, GH 9119: Adjust the bins relative to the wall time - bin_edges = binner.tz_localize(None) - bin_edges = ( - bin_edges - + Timedelta(days=1, unit=bin_edges.unit).as_unit(bin_edges.unit) - - Timedelta(1, unit=bin_edges.unit).as_unit(bin_edges.unit) + edges_dti = binner.tz_localize(None) + edges_dti = ( + edges_dti + + Timedelta(days=1, unit=edges_dti.unit).as_unit(edges_dti.unit) + - Timedelta(1, unit=edges_dti.unit).as_unit(edges_dti.unit) ) - bin_edges = bin_edges.tz_localize(binner.tz).asi8 + bin_edges = edges_dti.tz_localize(binner.tz).asi8 else: bin_edges = binner.asi8