Skip to content

TYP: resample #51178

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8783,7 +8783,7 @@ def resample(

axis = self._get_axis_number(axis)
return get_resampler(
self,
cast("Series | DataFrame", self),
freq=rule,
label=label,
closed=closed,
Expand Down
90 changes: 57 additions & 33 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ class Resampler(BaseGroupBy, PandasObject):

grouper: BinGrouper
_timegrouper: TimeGrouper
binner: DatetimeIndex | TimedeltaIndex | PeriodIndex # depends on subclass
exclusions: frozenset[Hashable] = frozenset() # for SelectionMixin compat

# to the groupby descriptor
Expand All @@ -145,15 +146,15 @@ class Resampler(BaseGroupBy, PandasObject):

def __init__(
self,
obj: DataFrame | Series,
groupby: TimeGrouper,
obj: NDFrame,
timegrouper: TimeGrouper,
axis: Axis = 0,
kind=None,
*,
group_keys: bool | lib.NoDefault = lib.no_default,
selection=None,
) -> None:
self._timegrouper = groupby
self._timegrouper = timegrouper
self.keys = None
self.sort = True
# error: Incompatible types in assignment (expression has type "Union
Expand Down Expand Up @@ -463,7 +464,7 @@ def _groupby_and_aggregate(self, how, *args, **kwargs):

return self._wrap_result(result)

def _get_resampler_for_grouping(self, groupby, key=None):
def _get_resampler_for_grouping(self, groupby: GroupBy, key):
"""
Return the correct class for resampling with groupby.
"""
Expand Down Expand Up @@ -1142,18 +1143,29 @@ class _GroupByMixin(PandasObject):

_attributes: list[str] # in practice the same as Resampler._attributes
_selection: IndexLabel | None = None
_groupby: GroupBy
_timegrouper: TimeGrouper

def __init__(self, *, parent: Resampler, groupby=None, key=None, **kwargs) -> None:
def __init__(
self,
*,
parent: Resampler,
groupby: GroupBy,
key=None,
selection: IndexLabel | None = None,
) -> None:
# reached via ._gotitem and _get_resampler_for_grouping

assert isinstance(groupby, GroupBy), type(groupby)

# parent is always a Resampler, sometimes a _GroupByMixin
assert isinstance(parent, Resampler), type(parent)

# initialize our GroupByMixin object with
# the resampler attributes
for attr in self._attributes:
setattr(self, attr, kwargs.get(attr, getattr(parent, attr)))
self._selection = kwargs.get("selection")
setattr(self, attr, getattr(parent, attr))
self._selection = selection

self.binner = parent.binner
self.key = key
Expand All @@ -1169,7 +1181,7 @@ def _apply(self, f, *args, **kwargs):
"""

def func(x):
x = self._resampler_cls(x, groupby=self._timegrouper)
x = self._resampler_cls(x, timegrouper=self._timegrouper)

if isinstance(f, str):
return getattr(x, f)(**kwargs)
Expand Down Expand Up @@ -1201,10 +1213,6 @@ def _gotitem(self, key, ndim, subset=None):
# error: "GotItemMixin" has no attribute "obj"
subset = self.obj # type: ignore[attr-defined]

# we need to make a shallow copy of ourselves
# with the same groupby
kwargs = {attr: getattr(self, attr) for attr in self._attributes}

# Try to select from a DataFrame, falling back to a Series
try:
if isinstance(key, list) and self.key not in key and self.key is not None:
Expand All @@ -1223,7 +1231,6 @@ def _gotitem(self, key, ndim, subset=None):
groupby=groupby,
parent=cast(Resampler, self),
selection=selection,
**kwargs,
)
return new_rs

Expand Down Expand Up @@ -1499,9 +1506,7 @@ def _resampler_cls(self):
return TimedeltaIndexResampler


def get_resampler(
obj, kind=None, **kwds
) -> DatetimeIndexResampler | PeriodIndexResampler | TimedeltaIndexResampler:
def get_resampler(obj: Series | DataFrame, kind=None, **kwds) -> Resampler:
"""
Create a TimeGrouper and return our resampler.
"""
Expand All @@ -1513,8 +1518,15 @@ def get_resampler(


def get_resampler_for_grouping(
groupby, rule, how=None, fill_method=None, limit=None, kind=None, on=None, **kwargs
):
groupby: GroupBy,
rule,
how=None,
fill_method=None,
limit=None,
kind=None,
on=None,
**kwargs,
) -> Resampler:
"""
Return our appropriate resampler when grouping as well.
"""
Expand Down Expand Up @@ -1641,19 +1653,19 @@ def __init__(

super().__init__(freq=freq, axis=axis, **kwargs)

def _get_resampler(self, obj, kind=None):
def _get_resampler(self, obj: NDFrame, kind=None) -> Resampler:
"""
Return my resampler or raise if we have an invalid axis.

Parameters
----------
obj : input object
obj : Series or DataFrame
kind : string, optional
'period','timestamp','timedelta' are valid

Returns
-------
a Resampler
Resampler

Raises
------
Expand All @@ -1665,15 +1677,23 @@ def _get_resampler(self, obj, kind=None):
ax = self.ax
if isinstance(ax, DatetimeIndex):
return DatetimeIndexResampler(
obj, groupby=self, kind=kind, axis=self.axis, group_keys=self.group_keys
obj,
timegrouper=self,
kind=kind,
axis=self.axis,
group_keys=self.group_keys,
)
elif isinstance(ax, PeriodIndex) or kind == "period":
return PeriodIndexResampler(
obj, groupby=self, kind=kind, axis=self.axis, group_keys=self.group_keys
obj,
timegrouper=self,
kind=kind,
axis=self.axis,
group_keys=self.group_keys,
)
elif isinstance(ax, TimedeltaIndex):
return TimedeltaIndexResampler(
obj, groupby=self, axis=self.axis, group_keys=self.group_keys
obj, timegrouper=self, axis=self.axis, group_keys=self.group_keys
)

raise TypeError(
Expand All @@ -1682,10 +1702,12 @@ def _get_resampler(self, obj, kind=None):
f"but got an instance of '{type(ax).__name__}'"
)

def _get_grouper(self, obj, validate: bool = True):
def _get_grouper(
self, obj: NDFrameT, validate: bool = True
) -> tuple[BinGrouper, NDFrameT]:
# create the resampler and return our binner
r = self._get_resampler(obj)
return r.grouper, r.obj
return r.grouper, cast(NDFrameT, r.obj)

def _get_time_bins(self, ax: DatetimeIndex):
if not isinstance(ax, DatetimeIndex):
Expand Down Expand Up @@ -1750,19 +1772,21 @@ def _get_time_bins(self, ax: DatetimeIndex):

return binner, bins, labels

def _adjust_bin_edges(self, binner, ax_values):
def _adjust_bin_edges(
self, binner: DatetimeIndex, ax_values: npt.NDArray[np.int64]
) -> tuple[DatetimeIndex, npt.NDArray[np.int64]]:
# Some hacks for > daily data, see #1471, #1458, #1483

if self.freq != "D" and is_superperiod(self.freq, "D"):
if self.closed == "right":
# GH 21459, GH 9119: Adjust the bins relative to the wall time
bin_edges = binner.tz_localize(None)
bin_edges = (
bin_edges
+ Timedelta(days=1, unit=bin_edges.unit).as_unit(bin_edges.unit)
- Timedelta(1, unit=bin_edges.unit).as_unit(bin_edges.unit)
edges_dti = binner.tz_localize(None)
edges_dti = (
edges_dti
+ Timedelta(days=1, unit=edges_dti.unit).as_unit(edges_dti.unit)
- Timedelta(1, unit=edges_dti.unit).as_unit(edges_dti.unit)
)
bin_edges = bin_edges.tz_localize(binner.tz).asi8
bin_edges = edges_dti.tz_localize(binner.tz).asi8
else:
bin_edges = binner.asi8

Expand Down