From 90dbf6cc388f7d7cea4b4038a0c97f179dcc7066 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Thu, 30 Sep 2021 15:54:17 +0300 Subject: [PATCH] Backport PR #43802: REG: Regression in explode when column is non string --- doc/source/whatsnew/v1.3.4.rst | 1 + pandas/core/frame.py | 7 +++---- pandas/tests/frame/methods/test_explode.py | 12 ++++++++---- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v1.3.4.rst b/doc/source/whatsnew/v1.3.4.rst index ccc475a74e13a..05667264ad9af 100644 --- a/doc/source/whatsnew/v1.3.4.rst +++ b/doc/source/whatsnew/v1.3.4.rst @@ -22,6 +22,7 @@ Fixed regressions - Fixed regression in :meth:`Series.cat.reorder_categories` failing to update the categories on the ``Series`` (:issue:`43232`) - Fixed regression in :meth:`Series.cat.categories` setter failing to update the categories on the ``Series`` (:issue:`43334`) - Fixed regression in :meth:`pandas.read_csv` raising ``UnicodeDecodeError`` exception when ``memory_map=True`` (:issue:`43540`) +- Fixed regression in :meth:`DataFrame.explode` raising ``AssertionError`` when ``column`` is any scalar which is not a string (:issue:`43314`) - Fixed regression in :meth:`Series.aggregate` attempting to pass ``args`` and ``kwargs`` multiple times to the user supplied ``func`` in certain cases (:issue:`43357`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 47c6faa725774..6da568698192d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8137,7 +8137,7 @@ def stack(self, level: Level = -1, dropna: bool = True): def explode( self, - column: str | tuple | list[str | tuple], + column: Scalar | tuple | list[Scalar | tuple], ignore_index: bool = False, ) -> DataFrame: """ @@ -8147,7 +8147,7 @@ def explode( Parameters ---------- - column : str or tuple or list thereof + column : Scalar or tuple or list thereof Column(s) to explode. For multiple columns, specify a non-empty list with each element be str or tuple, and all specified columns their list-like data @@ -8229,9 +8229,8 @@ def explode( if not self.columns.is_unique: raise ValueError("columns must be unique") - columns: list[str | tuple] + columns: list[Scalar | tuple] if is_scalar(column) or isinstance(column, tuple): - assert isinstance(column, (str, tuple)) columns = [column] elif isinstance(column, list) and all( map(lambda c: is_scalar(c) or isinstance(c, tuple), column) diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py index 6fdf5d806ac6b..8716a181120f6 100644 --- a/pandas/tests/frame/methods/test_explode.py +++ b/pandas/tests/frame/methods/test_explode.py @@ -53,14 +53,18 @@ def test_error_multi_columns(input_subset, error_message): df.explode(input_subset) -def test_basic(): +@pytest.mark.parametrize( + "scalar", + ["a", 0, 1.5, pd.Timedelta("1 days"), pd.Timestamp("2019-12-31")], +) +def test_basic(scalar): df = pd.DataFrame( - {"A": pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")), "B": 1} + {scalar: pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")), "B": 1} ) - result = df.explode("A") + result = df.explode(scalar) expected = pd.DataFrame( { - "A": pd.Series( + scalar: pd.Series( [0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object ), "B": 1,