Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
idx = pd.date_range('0300-01-01', '2000-01-01', unit='s')
ser = Series(np.ones(len(idx)), index=idx)
ser.resample('D').mean()
Issue Description
---------------------------------------------------------------------------
OverflowError Traceback (most recent call last)
File ~/pandas-dev/pandas/_libs/tslibs/timestamps.pyx:1073, in pandas._libs.tslibs.timestamps._Timestamp._as_creso()
1072 try:
-> 1073 value = convert_reso(self.value, self._creso, creso, round_ok=round_ok)
1074 except OverflowError as err:
File ~/pandas-dev/pandas/_libs/tslibs/np_datetime.pyx:614, in pandas._libs.tslibs.np_datetime.convert_reso()
613 # Note: caller is responsible for re-raising as OutOfBoundsTimedelta
--> 614 res_value = value * mult
615
OverflowError: value too large
The above exception was the direct cause of the following exception:
OutOfBoundsDatetime Traceback (most recent call last)
Cell In[16], line 1
----> 1 ser.resample('D').mean()
File ~/pandas-dev/pandas/core/series.py:5660, in Series.resample(self, rule, axis, closed, label, convention, kind, on, level, origin, offset, group_keys)
5645 @doc(NDFrame.resample, **_shared_doc_kwargs) # type: ignore[has-type]
5646 def resample(
5647 self,
(...)
5658 group_keys: bool | lib.NoDefault = no_default,
5659 ) -> Resampler:
-> 5660 return super().resample(
5661 rule=rule,
5662 axis=axis,
5663 closed=closed,
5664 label=label,
5665 convention=convention,
5666 kind=kind,
5667 on=on,
5668 level=level,
5669 origin=origin,
5670 offset=offset,
5671 group_keys=group_keys,
5672 )
File ~/pandas-dev/pandas/core/generic.py:8820, in NDFrame.resample(self, rule, axis, closed, label, convention, kind, on, level, origin, offset, group_keys)
8817 from pandas.core.resample import get_resampler
8819 axis = self._get_axis_number(axis)
-> 8820 return get_resampler(
8821 self,
8822 freq=rule,
8823 label=label,
8824 closed=closed,
8825 axis=axis,
8826 kind=kind,
8827 convention=convention,
8828 key=on,
8829 level=level,
8830 origin=origin,
8831 offset=offset,
8832 group_keys=group_keys,
8833 )
File ~/pandas-dev/pandas/core/resample.py:1517, in get_resampler(obj, kind, **kwds)
1513 """
1514 Create a TimeGrouper and return our resampler.
1515 """
1516 tg = TimeGrouper(**kwds)
-> 1517 return tg._get_resampler(obj, kind=kind)
File ~/pandas-dev/pandas/core/resample.py:1675, in TimeGrouper._get_resampler(self, obj, kind)
1673 ax = self.ax
1674 if isinstance(ax, DatetimeIndex):
-> 1675 return DatetimeIndexResampler(
1676 obj, groupby=self, kind=kind, axis=self.axis, group_keys=self.group_keys
1677 )
1678 elif isinstance(ax, PeriodIndex) or kind == "period":
1679 return PeriodIndexResampler(
1680 obj, groupby=self, kind=kind, axis=self.axis, group_keys=self.group_keys
1681 )
File ~/pandas-dev/pandas/core/resample.py:166, in Resampler.__init__(self, obj, groupby, axis, kind, group_keys, selection, **kwargs)
163 self.as_index = True
165 self.groupby._set_grouper(self._convert_obj(obj), sort=True)
--> 166 self.binner, self.grouper = self._get_binner()
167 self._selection = selection
168 if self.groupby.key is not None:
File ~/pandas-dev/pandas/core/resample.py:253, in Resampler._get_binner(self)
247 @final
248 def _get_binner(self):
249 """
250 Create the BinGrouper, assume that self.set_grouper(obj)
251 has already been called.
252 """
--> 253 binner, bins, binlabels = self._get_binner_for_time()
254 assert len(bins) == len(binlabels)
255 bin_grouper = BinGrouper(bins, binlabels, indexer=self.groupby.indexer)
File ~/pandas-dev/pandas/core/resample.py:1249, in DatetimeIndexResampler._get_binner_for_time(self)
1247 if self.kind == "period":
1248 return self.groupby._get_time_period_bins(self.ax)
-> 1249 return self.groupby._get_time_bins(self.ax)
File ~/pandas-dev/pandas/core/resample.py:1709, in TimeGrouper._get_time_bins(self, ax)
1706 binner = labels = DatetimeIndex(data=[], freq=self.freq, name=ax.name)
1707 return binner, [], labels
-> 1709 first, last = _get_timestamp_range_edges(
1710 ax.min(),
1711 ax.max(),
1712 self.freq,
1713 closed=self.closed,
1714 origin=self.origin,
1715 offset=self.offset,
1716 )
1717 # GH #12037
1718 # use first/last directly instead of call replace() on them
1719 # because replace() will swallow the nanosecond part
(...)
1722 # GH 25758: If DST lands at midnight (e.g. 'America/Havana'), user feedback
1723 # has noted that ambiguous=True provides the most sensible result
1724 binner = labels = date_range(
1725 freq=self.freq,
1726 start=first,
(...)
1731 nonexistent="shift_forward",
1732 ).as_unit(ax.unit)
File ~/pandas-dev/pandas/core/resample.py:1994, in _get_timestamp_range_edges(first, last, freq, closed, origin, offset)
1991 if isinstance(origin, Timestamp):
1992 origin = origin.tz_localize(None)
-> 1994 first, last = _adjust_dates_anchored(
1995 first, last, freq, closed=closed, origin=origin, offset=offset
1996 )
1997 if isinstance(freq, Day):
1998 first = first.tz_localize(index_tz)
File ~/pandas-dev/pandas/core/resample.py:2101, in _adjust_dates_anchored(first, last, freq, closed, origin, offset)
2088 def _adjust_dates_anchored(
2089 first: Timestamp,
2090 last: Timestamp,
(...)
2099 # To handle frequencies that are not multiple or divisible by a day we let
2100 # the possibility to define a fixed origin timestamp. See GH 31809
-> 2101 first = first.as_unit("ns")
2102 last = last.as_unit("ns")
2103 if offset is not None:
File ~/pandas-dev/pandas/_libs/tslibs/timestamps.pyx:1099, in pandas._libs.tslibs.timestamps._Timestamp.as_unit()
1097 reso = get_unit_from_dtype(dtype)
1098 try:
-> 1099 return self._as_creso(reso, round_ok=round_ok)
1100 except OverflowError as err:
1101 raise OutOfBoundsDatetime(
File ~/pandas-dev/pandas/_libs/tslibs/timestamps.pyx:1076, in pandas._libs.tslibs.timestamps._Timestamp._as_creso()
1074 except OverflowError as err:
1075 unit = npy_unit_to_abbrev(creso)
-> 1076 raise OutOfBoundsDatetime(
1077 f"Cannot cast {self} to unit='{unit}' without overflow."
1078 ) from err
OutOfBoundsDatetime: Cannot cast 300-01-01 00:00:00 to unit='ns' without overflow.
Expected Behavior
something like
0300-01-01 1.0
0300-01-02 1.0
0300-01-03 1.0
0300-01-04 1.0
0300-01-05 1.0
...
1999-12-28 1.0
1999-12-29 1.0
1999-12-30 1.0
1999-12-31 1.0
2000-01-01 1.0
Freq: D, Length: 109573, dtype: float64
Installed Versions
INSTALLED VERSIONS
commit : 9594c04
python : 3.8.16.final.0
python-bits : 64
OS : Linux
OS-release : 5.10.102.1-microsoft-standard-WSL2
Version : #1 SMP Wed Mar 2 00:30:59 UTC 2022
machine : x86_64
processor : x86_64
byteorder : little
LC_ALL : None
LANG : en_GB.UTF-8
LOCALE : en_GB.UTF-8
pandas : 2.0.0.dev0+1336.g9594c04eb7
numpy : 1.23.5
pytz : 2022.7.1
dateutil : 2.8.2
setuptools : 66.1.1
pip : 22.3.1
Cython : 0.29.32
pytest : 7.2.1
hypothesis : 6.64.0
sphinx : 5.3.0
blosc : 1.11.1
feather : None
xlsxwriter : 3.0.7
lxml.etree : 4.9.2
html5lib : 1.1
pymysql : 1.0.2
psycopg2 : 2.9.5
jinja2 : 3.1.2
IPython : 8.8.0
pandas_datareader: None
bs4 : 4.11.1
bottleneck : 1.3.6
brotli :
fastparquet : 2023.1.0
fsspec : 2022.11.0
gcsfs : 2022.11.0
matplotlib : 3.6.3
numba : 0.56.4
numexpr : 2.8.4
odfpy : None
openpyxl : 3.0.10
pandas_gbq : None
pyarrow : 10.0.1
pyreadstat : 1.2.0
pyxlsb : 1.0.10
s3fs : 2022.11.0
scipy : 1.10.0
snappy :
sqlalchemy : 1.4.45
tables : 3.8.0
tabulate : 0.9.0
xarray : 2023.1.0
xlrd : 2.0.1
zstandard : 0.19.0
tzdata : 2022.7
qtpy : None
pyqt5 : None
None