From cead7e6425efed249a61ad9abc431bf959b7795b Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 15 Dec 2020 08:32:33 -0800 Subject: [PATCH 1/2] REG: DataFrame.shift with axis=1 and CategoricalIndex columns --- doc/source/whatsnew/v1.3.0.rst | 2 +- pandas/core/frame.py | 7 +++++-- pandas/tests/frame/methods/test_shift.py | 11 ++++++++++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 57dd1d05a274e..8bd4ffb152c32 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -195,7 +195,7 @@ Numeric ^^^^^^^ - Bug in :meth:`DataFrame.quantile`, :meth:`DataFrame.sort_values` causing incorrect subsequent indexing behavior (:issue:`38351`) - Bug in :meth:`DataFrame.select_dtypes` with ``include=np.number`` now retains numeric ``ExtensionDtype`` columns (:issue:`35340`) -- +- Bug in :meth:`DataFrame.shift` with ``axis=1`` and :class:`CategoricalIndex` columns (:issue:`38434`) Conversion ^^^^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f79e459362d7b..1e045cfbb6936 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4584,20 +4584,23 @@ def shift( if axis == 1 and periods != 0 and fill_value is lib.no_default and ncols > 0: # We will infer fill_value to match the closest column + # Use a column that we know is valid for our column's dtype GH#38434 + label = self.columns[0] + if periods > 0: result = self.iloc[:, :-periods] for col in range(min(ncols, abs(periods))): # TODO(EA2D): doing this in a loop unnecessary with 2D EAs # Define filler inside loop so we get a copy filler = self.iloc[:, 0].shift(len(self)) - result.insert(0, col, filler, allow_duplicates=True) + result.insert(0, label, filler, allow_duplicates=True) else: result = self.iloc[:, -periods:] for col in range(min(ncols, abs(periods))): # Define filler inside loop so we get a copy filler = self.iloc[:, -1].shift(len(self)) result.insert( - len(result.columns), col, filler, allow_duplicates=True + len(result.columns), label, filler, allow_duplicates=True ) result.columns = self.columns.copy() diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index 2e21ce8ec2256..c9bc14629c8bb 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -2,7 +2,7 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, Series, date_range, offsets +from pandas import CategoricalIndex, DataFrame, Index, Series, date_range, offsets import pandas._testing as tm @@ -292,3 +292,12 @@ def test_shift_dt64values_int_fill_deprecated(self): expected = DataFrame({"A": [pd.Timestamp(0), pd.Timestamp(0)], "B": df2["A"]}) tm.assert_frame_equal(result, expected) + + def test_shift_axis1_categorical_columns(self): + # GH#38434 + ci = CategoricalIndex(["a", "b"]) + df = DataFrame([[1, 2], [3, 4]], index=ci, columns=ci) + result = df.shift(axis=1) + + expected = DataFrame({"a": [np.nan, np.nan], "b": [1, 3]}, index=ci, columns=ci) + tm.assert_frame_equal(result, expected) From 3c07dcc6b6336e6c75ed4d8f4f72ef27030357a4 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 15 Dec 2020 15:45:08 -0800 Subject: [PATCH 2/2] test with periods=2, remove whatsnew --- doc/source/whatsnew/v1.3.0.rst | 1 - pandas/tests/frame/methods/test_shift.py | 19 ++++++++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 8bd4ffb152c32..d6dd1872c3e7e 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -195,7 +195,6 @@ Numeric ^^^^^^^ - Bug in :meth:`DataFrame.quantile`, :meth:`DataFrame.sort_values` causing incorrect subsequent indexing behavior (:issue:`38351`) - Bug in :meth:`DataFrame.select_dtypes` with ``include=np.number`` now retains numeric ``ExtensionDtype`` columns (:issue:`35340`) -- Bug in :meth:`DataFrame.shift` with ``axis=1`` and :class:`CategoricalIndex` columns (:issue:`38434`) Conversion ^^^^^^^^^^ diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index c9bc14629c8bb..40b3f1e89c015 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -295,9 +295,22 @@ def test_shift_dt64values_int_fill_deprecated(self): def test_shift_axis1_categorical_columns(self): # GH#38434 - ci = CategoricalIndex(["a", "b"]) - df = DataFrame([[1, 2], [3, 4]], index=ci, columns=ci) + ci = CategoricalIndex(["a", "b", "c"]) + df = DataFrame( + {"a": [1, 3], "b": [2, 4], "c": [5, 6]}, index=ci[:-1], columns=ci + ) result = df.shift(axis=1) - expected = DataFrame({"a": [np.nan, np.nan], "b": [1, 3]}, index=ci, columns=ci) + expected = DataFrame( + {"a": [np.nan, np.nan], "b": [1, 3], "c": [2, 4]}, index=ci[:-1], columns=ci + ) + tm.assert_frame_equal(result, expected) + + # periods != 1 + result = df.shift(2, axis=1) + expected = DataFrame( + {"a": [np.nan, np.nan], "b": [np.nan, np.nan], "c": [1, 3]}, + index=ci[:-1], + columns=ci, + ) tm.assert_frame_equal(result, expected)