Skip to content

Series shift method breaks for series of pandas Intervals in Pandas 1.0 (works in 0.25.3) #31495

Closed
@owenlamont

Description

@owenlamont

Code Sample, a copy-pastable example if possible

# Problem code example 1
import pandas as pd
test = pd.Series(index=[1, 2], data=[pd.Interval(pd.Timestamp("2020-09-04 10:00:00"), pd.Timestamp("2020-11-30 14:00:00")), pd.Interval(pd.Timestamp("2020-08-14 10:00:00"), pd.Timestamp("2020-09-21 14:00:00"))])
test.shift(1)

# Problem code example 2
import pandas as pd
test = pd.Series(index=[1, 2], data=[pd.Interval(1, 2), pd.Interval(3, 4)])
test.shift(1)

Problem description

Calling the shift method on an integer indexed Pandas series of Pandas intervals throws opaque exceptions. The same code works as expected in Pandas 0.25.3. This is definitely a breaking change - I'm unsure if it is intentional. I'm assuming it should still work the same as 0.25.3 for now. I tried searching for any documented changes to the shift method behaviour but didn't find any.

Exception traceback for example 1:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-2-7a1f088b5a14> in <module>
      4 test = pd.Series(index=[1, 2], data=[pd.Interval(pd.Timestamp("2020-09-04 10:00:00"), pd.Timestamp("2020-11-30 14:00:00")),
      5                                      pd.Interval(pd.Timestamp("2020-08-14 10:00:00"), pd.Timestamp("2020-09-21 14:00:00"))])
----> 6 test.shift(1)

~\Miniconda3\envs\jupyter\lib\site-packages\pandas\core\series.py in shift(self, periods, freq, axis, fill_value)
   4183     def shift(self, periods=1, freq=None, axis=0, fill_value=None):
   4184         return super().shift(
-> 4185             periods=periods, freq=freq, axis=axis, fill_value=fill_value
   4186         )
   4187 

~\Miniconda3\envs\jupyter\lib\site-packages\pandas\core\generic.py in shift(self, periods, freq, axis, fill_value)
   9043         if freq is None:
   9044             new_data = self._data.shift(
-> 9045                 periods=periods, axis=block_axis, fill_value=fill_value
   9046             )
   9047         else:

~\Miniconda3\envs\jupyter\lib\site-packages\pandas\core\internals\managers.py in shift(self, **kwargs)
    571 
    572     def shift(self, **kwargs):
--> 573         return self.apply("shift", **kwargs)
    574 
    575     def fillna(self, **kwargs):

~\Miniconda3\envs\jupyter\lib\site-packages\pandas\core\internals\managers.py in apply(self, f, filter, **kwargs)
    440                 applied = b.apply(f, **kwargs)
    441             else:
--> 442                 applied = getattr(b, f)(**kwargs)
    443             result_blocks = _extend_blocks(applied, result_blocks)
    444 

~\Miniconda3\envs\jupyter\lib\site-packages\pandas\core\internals\blocks.py in shift(self, periods, axis, fill_value)
   1908         return [
   1909             self.make_block_same_class(
-> 1910                 self.values.shift(periods=periods, fill_value=fill_value),
   1911                 placement=self.mgr_locs,
   1912                 ndim=self.ndim,

~\Miniconda3\envs\jupyter\lib\site-packages\pandas\core\arrays\base.py in shift(self, periods, fill_value)
    623 
    624         empty = self._from_sequence(
--> 625             [fill_value] * min(abs(periods), len(self)), dtype=self.dtype
    626         )
    627         if periods > 0:

~\Miniconda3\envs\jupyter\lib\site-packages\pandas\core\arrays\interval.py in _from_sequence(cls, scalars, dtype, copy)
    243     @classmethod
    244     def _from_sequence(cls, scalars, dtype=None, copy=False):
--> 245         return cls(scalars, dtype=dtype, copy=copy)
    246 
    247     @classmethod

~\Miniconda3\envs\jupyter\lib\site-packages\pandas\core\arrays\interval.py in __new__(cls, data, closed, dtype, copy, verify_integrity)
    182             copy=copy,
    183             dtype=dtype,
--> 184             verify_integrity=verify_integrity,
    185         )
    186 

~\Miniconda3\envs\jupyter\lib\site-packages\pandas\core\arrays\interval.py in _simple_new(cls, left, right, closed, copy, dtype, verify_integrity)
    202                 raise TypeError(msg)
    203             elif dtype.subtype is not None:
--> 204                 left = left.astype(dtype.subtype)
    205                 right = right.astype(dtype.subtype)
    206 

~\Miniconda3\envs\jupyter\lib\site-packages\pandas\core\indexes\numeric.py in astype(self, dtype, copy)
    393         if needs_i8_conversion(dtype):
    394             raise TypeError(
--> 395                 f"Cannot convert Float64Index to dtype {dtype}; integer "
    396                 "values are required for conversion"
    397             )

TypeError: Cannot convert Float64Index to dtype datetime64[ns]; integer values are required for conversion

Exception traceback for example 2

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-3-6804e066ee18> in <module>
      4 test = pd.Series(index=[1, 2], data=[pd.Interval(1, 2),
      5                                      pd.Interval(3, 4)])
----> 6 test.shift(1)

~\Miniconda3\envs\jupyter\lib\site-packages\pandas\core\series.py in shift(self, periods, freq, axis, fill_value)
   4183     def shift(self, periods=1, freq=None, axis=0, fill_value=None):
   4184         return super().shift(
-> 4185             periods=periods, freq=freq, axis=axis, fill_value=fill_value
   4186         )
   4187 

~\Miniconda3\envs\jupyter\lib\site-packages\pandas\core\generic.py in shift(self, periods, freq, axis, fill_value)
   9043         if freq is None:
   9044             new_data = self._data.shift(
-> 9045                 periods=periods, axis=block_axis, fill_value=fill_value
   9046             )
   9047         else:

~\Miniconda3\envs\jupyter\lib\site-packages\pandas\core\internals\managers.py in shift(self, **kwargs)
    571 
    572     def shift(self, **kwargs):
--> 573         return self.apply("shift", **kwargs)
    574 
    575     def fillna(self, **kwargs):

~\Miniconda3\envs\jupyter\lib\site-packages\pandas\core\internals\managers.py in apply(self, f, filter, **kwargs)
    440                 applied = b.apply(f, **kwargs)
    441             else:
--> 442                 applied = getattr(b, f)(**kwargs)
    443             result_blocks = _extend_blocks(applied, result_blocks)
    444 

~\Miniconda3\envs\jupyter\lib\site-packages\pandas\core\internals\blocks.py in shift(self, periods, axis, fill_value)
   1908         return [
   1909             self.make_block_same_class(
-> 1910                 self.values.shift(periods=periods, fill_value=fill_value),
   1911                 placement=self.mgr_locs,
   1912                 ndim=self.ndim,

~\Miniconda3\envs\jupyter\lib\site-packages\pandas\core\arrays\base.py in shift(self, periods, fill_value)
    623 
    624         empty = self._from_sequence(
--> 625             [fill_value] * min(abs(periods), len(self)), dtype=self.dtype
    626         )
    627         if periods > 0:

~\Miniconda3\envs\jupyter\lib\site-packages\pandas\core\arrays\interval.py in _from_sequence(cls, scalars, dtype, copy)
    243     @classmethod
    244     def _from_sequence(cls, scalars, dtype=None, copy=False):
--> 245         return cls(scalars, dtype=dtype, copy=copy)
    246 
    247     @classmethod

~\Miniconda3\envs\jupyter\lib\site-packages\pandas\core\arrays\interval.py in __new__(cls, data, closed, dtype, copy, verify_integrity)
    182             copy=copy,
    183             dtype=dtype,
--> 184             verify_integrity=verify_integrity,
    185         )
    186 

~\Miniconda3\envs\jupyter\lib\site-packages\pandas\core\arrays\interval.py in _simple_new(cls, left, right, closed, copy, dtype, verify_integrity)
    202                 raise TypeError(msg)
    203             elif dtype.subtype is not None:
--> 204                 left = left.astype(dtype.subtype)
    205                 right = right.astype(dtype.subtype)
    206 

~\Miniconda3\envs\jupyter\lib\site-packages\pandas\core\indexes\numeric.py in astype(self, dtype, copy)
    399             # TODO(jreback); this can change once we have an EA Index type
    400             # GH 13149
--> 401             arr = astype_nansafe(self.values, dtype=dtype)
    402             return Int64Index(arr)
    403         return super().astype(dtype, copy=copy)

~\Miniconda3\envs\jupyter\lib\site-packages\pandas\core\dtypes\cast.py in astype_nansafe(arr, dtype, copy, skipna)
    866 
    867         if not np.isfinite(arr).all():
--> 868             raise ValueError("Cannot convert non-finite values (NA or inf) to integer")
    869 
    870     elif is_object_dtype(arr):

ValueError: Cannot convert non-finite values (NA or inf) to integer

Expected Output

This is the actual output I got executing with pandas 0.25.3

1 NaN
2 (2020-09-04 10:00:00, 2020-11-30 14:00:00]
dtype: object

1 NaN
2 (1, 2]
dtype: object

Output of pd.show_versions()

commit : None
python : 3.7.3.final.0
python-bits : 64
OS : Windows
OS-release : 10
machine : AMD64
processor : Intel64 Family 6 Model 158 Stepping 13, GenuineIntel
byteorder : little
LC_ALL : None
LANG : None
LOCALE : None.None

pandas : 1.0.0
numpy : 1.17.5
pytz : 2019.3
dateutil : 2.8.1
pip : 20.0.2
setuptools : 45.1.0.post20200119
Cython : None
pytest : 5.3.5
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : 1.2.7
lxml.etree : 4.5.0
html5lib : 1.0.1
pymysql : None
psycopg2 : None
jinja2 : 2.11.1
IPython : 7.11.1
pandas_datareader: None
bs4 : 4.8.2
bottleneck : 1.3.1
fastparquet : 0.3.2
gcsfs : None
lxml.etree : 4.5.0
matplotlib : 3.1.2
numexpr : None
odfpy : None
openpyxl : 3.0.3
pandas_gbq : None
pyarrow : 0.15.1
pytables : None
pytest : 5.3.5
pyxlsb : None
s3fs : 0.2.2
scipy : 1.3.1
sqlalchemy : 1.3.13
tables : None
tabulate : 0.8.6
xarray : 0.14.1
xlrd : 1.2.0
xlwt : None
xlsxwriter : 1.2.7
numba : 0.48.0

Metadata

Metadata

Assignees

No one assigned

    Labels

    AlgosNon-arithmetic algos: value_counts, factorize, sorting, isin, clip, shift, diffExtensionArrayExtending pandas with custom dtypes or arrays.IntervalInterval data typeMissing-datanp.nan, pd.NaT, pd.NA, dropna, isnull, interpolateRegressionFunctionality that used to work in a prior pandas version

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions