From f84c520428a376dd8f93c0d88872733322057a6f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 22 Aug 2021 20:31:44 -0700 Subject: [PATCH 1/3] BUG: rolling with Int64 --- doc/source/whatsnew/v1.4.0.rst | 2 +- pandas/core/window/rolling.py | 6 +++++- pandas/tests/window/test_dtypes.py | 9 +++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 205a49e7786a7..d7de6ab7a5c79 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -341,7 +341,7 @@ Groupby/resample/rolling - Bug in :meth:`pandas.DataFrame.ewm`, where non-float64 dtypes were silently failing (:issue:`42452`) - Bug in :meth:`pandas.DataFrame.rolling` operation along rows (``axis=1``) incorrectly omits columns containing ``float16`` and ``float32`` (:issue:`41779`) - Bug in :meth:`Resampler.aggregate` did not allow the use of Named Aggregation (:issue:`32803`) -- +- Bug in :meth:`Series.rolling` when the :class:`Series` ``dtype`` was ``Int64`` (:issue:`43016`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 637b7c705d73c..844d29bc41827 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -318,7 +318,11 @@ def _prep_values(self, values: ArrayLike) -> np.ndarray: # GH #12373 : rolling functions error on float32 data # make sure the data is coerced to float64 try: - values = ensure_float64(values) + if hasattr(values, "to_numpy"): + # GH 43016: ExtensionArray + values = values.to_numpy(dtype=np.float64, na_value=np.nan) + else: + values = ensure_float64(values) except (ValueError, TypeError) as err: raise TypeError(f"cannot handle this type -> {values.dtype}") from err diff --git a/pandas/tests/window/test_dtypes.py b/pandas/tests/window/test_dtypes.py index 7cd2bf4f1ca19..12c653ca3bf98 100644 --- a/pandas/tests/window/test_dtypes.py +++ b/pandas/tests/window/test_dtypes.py @@ -2,6 +2,7 @@ import pytest from pandas import ( + NA, DataFrame, Series, ) @@ -76,6 +77,14 @@ def test_series_dtypes(method, data, expected_data, coerce_int, dtypes, min_peri tm.assert_almost_equal(result, expected) +def test_series_nullable_int(any_signed_int_ea_dtype): + # GH 43016 + s = Series([0, 1, NA], dtype=any_signed_int_ea_dtype) + result = s.rolling(2).mean() + expected = Series([np.nan, 0.5, np.nan]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( "method, expected_data, min_periods", [ From 57b0ceb26b38ab0a9295bb853fa5f7a053fe141e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 26 Aug 2021 15:55:34 -0700 Subject: [PATCH 2/3] Move logic into ensure_float64 --- pandas/_libs/algos_common_helper.pxi.in | 34 ++++++++++++++----------- pandas/core/window/rolling.py | 6 +---- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index 64e8bdea4672c..c677845b3a96f 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -37,27 +37,27 @@ def ensure_object(object arr): {{py: # name, c_type, dtype -dtypes = [('float64', 'FLOAT64', 'float64'), - ('float32', 'FLOAT32', 'float32'), - ('int8', 'INT8', 'int8'), - ('int16', 'INT16', 'int16'), - ('int32', 'INT32', 'int32'), - ('int64', 'INT64', 'int64'), - ('uint8', 'UINT8', 'uint8'), - ('uint16', 'UINT16', 'uint16'), - ('uint32', 'UINT32', 'uint32'), - ('uint64', 'UINT64', 'uint64'), - # ('platform_int', 'INT', 'int_'), - # ('object', 'OBJECT', 'object_'), +dtypes = [('float64', 'FLOAT64', 'float64', 'nan'), + ('float32', 'FLOAT32', 'float32', 'nan'), + ('int8', 'INT8', 'int8', None), + ('int16', 'INT16', 'int16', None), + ('int32', 'INT32', 'int32', None), + ('int64', 'INT64', 'int64', None), + ('uint8', 'UINT8', 'uint8', None), + ('uint16', 'UINT16', 'uint16', None), + ('uint32', 'UINT32', 'uint32', None), + ('uint64', 'UINT64', 'uint64', None), + # ('platform_int', 'INT', 'int_', None), + # ('object', 'OBJECT', 'object_', None), ] def get_dispatch(dtypes): - for name, c_type, dtype in dtypes: - yield name, c_type, dtype + for name, c_type, dtype, na_val in dtypes: + yield name, c_type, dtype, na_val }} -{{for name, c_type, dtype in get_dispatch(dtypes)}} +{{for name, c_type, dtype, na_val in get_dispatch(dtypes)}} def ensure_{{name}}(object arr, copy=True): @@ -66,6 +66,10 @@ def ensure_{{name}}(object arr, copy=True): return arr else: return arr.astype(np.{{dtype}}, copy=copy) +{{if na_val == "nan"}} + elif hasattr(arr, "to_numpy"): + return arr.to_numpy(np.{{dtype}}, copy=copy, na_value=np.{{na_val}}) +{{endif}} else: return np.array(arr, dtype=np.{{dtype}}) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 238aa7f962c67..ab23b84a3b8c6 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -317,11 +317,7 @@ def _prep_values(self, values: ArrayLike) -> np.ndarray: # GH #12373 : rolling functions error on float32 data # make sure the data is coerced to float64 try: - if hasattr(values, "to_numpy"): - # GH 43016: ExtensionArray - values = values.to_numpy(dtype=np.float64, na_value=np.nan) - else: - values = ensure_float64(values) + values = ensure_float64(values) except (ValueError, TypeError) as err: raise TypeError(f"cannot handle this type -> {values.dtype}") from err From 77416cc76dbb64a5f2114fc06bf688d92b4e2245 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 30 Aug 2021 22:52:53 -0700 Subject: [PATCH 3/3] Use isinstance check instead --- pandas/_libs/algos_common_helper.pxi.in | 34 +++++++++++-------------- pandas/core/window/rolling.py | 6 ++++- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index c677845b3a96f..64e8bdea4672c 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -37,27 +37,27 @@ def ensure_object(object arr): {{py: # name, c_type, dtype -dtypes = [('float64', 'FLOAT64', 'float64', 'nan'), - ('float32', 'FLOAT32', 'float32', 'nan'), - ('int8', 'INT8', 'int8', None), - ('int16', 'INT16', 'int16', None), - ('int32', 'INT32', 'int32', None), - ('int64', 'INT64', 'int64', None), - ('uint8', 'UINT8', 'uint8', None), - ('uint16', 'UINT16', 'uint16', None), - ('uint32', 'UINT32', 'uint32', None), - ('uint64', 'UINT64', 'uint64', None), - # ('platform_int', 'INT', 'int_', None), - # ('object', 'OBJECT', 'object_', None), +dtypes = [('float64', 'FLOAT64', 'float64'), + ('float32', 'FLOAT32', 'float32'), + ('int8', 'INT8', 'int8'), + ('int16', 'INT16', 'int16'), + ('int32', 'INT32', 'int32'), + ('int64', 'INT64', 'int64'), + ('uint8', 'UINT8', 'uint8'), + ('uint16', 'UINT16', 'uint16'), + ('uint32', 'UINT32', 'uint32'), + ('uint64', 'UINT64', 'uint64'), + # ('platform_int', 'INT', 'int_'), + # ('object', 'OBJECT', 'object_'), ] def get_dispatch(dtypes): - for name, c_type, dtype, na_val in dtypes: - yield name, c_type, dtype, na_val + for name, c_type, dtype in dtypes: + yield name, c_type, dtype }} -{{for name, c_type, dtype, na_val in get_dispatch(dtypes)}} +{{for name, c_type, dtype in get_dispatch(dtypes)}} def ensure_{{name}}(object arr, copy=True): @@ -66,10 +66,6 @@ def ensure_{{name}}(object arr, copy=True): return arr else: return arr.astype(np.{{dtype}}, copy=copy) -{{if na_val == "nan"}} - elif hasattr(arr, "to_numpy"): - return arr.to_numpy(np.{{dtype}}, copy=copy, na_value=np.{{na_val}}) -{{endif}} else: return np.array(arr, dtype=np.{{dtype}}) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index ab23b84a3b8c6..66ffc2600e88e 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -50,6 +50,7 @@ from pandas.core.algorithms import factorize from pandas.core.apply import ResamplerWindowApply +from pandas.core.arrays import ExtensionArray from pandas.core.base import ( DataError, SelectionMixin, @@ -317,7 +318,10 @@ def _prep_values(self, values: ArrayLike) -> np.ndarray: # GH #12373 : rolling functions error on float32 data # make sure the data is coerced to float64 try: - values = ensure_float64(values) + if isinstance(values, ExtensionArray): + values = values.to_numpy(np.float64, na_value=np.nan) + else: + values = ensure_float64(values) except (ValueError, TypeError) as err: raise TypeError(f"cannot handle this type -> {values.dtype}") from err