From 3da472fdd7765cb45734b9f7fdd2630b143682b8 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 17 Nov 2020 20:12:55 -0800 Subject: [PATCH 1/3] BUG: loc.setitem with expansion expanding rows --- pandas/core/indexing.py | 7 +++++++ pandas/tests/indexing/test_loc.py | 9 ++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 62b1554246e26..3c60753b5240e 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1684,6 +1684,13 @@ def _setitem_with_indexer_split_path(self, indexer, value): for loc, v in zip(ilocs, value): self._setitem_single_column(loc, v, pi) + elif len(ilocs) == 1 and com.is_null_slice(pi) and len(self.obj) == 0: + # This is a setitem-with-expansion, see + # test_loc_setitem_empty_append_expands_rows + # e.g. df = DataFrame(columns=["x", "y"]) + # df.loc[:, "x"] = [1, 2, 3] + self._setitem_single_column(ilocs[0], value, pi) + else: raise ValueError( "Must have equal len keys and value " diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 28846bcf2f14d..8170b10e9ad6e 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -942,7 +942,7 @@ def test_loc_uint64(self): result = s.loc[[np.iinfo("uint64").max - 1, np.iinfo("uint64").max]] tm.assert_series_equal(result, s) - def test_loc_setitem_empty_append(self): + def test_loc_setitem_empty_append_expands_rows(self): # GH6173, various appends to an empty dataframe data = [1, 2, 3] @@ -953,6 +953,13 @@ def test_loc_setitem_empty_append(self): df.loc[:, "x"] = data tm.assert_frame_equal(df, expected) + # same thing, but with mixed dtypes + df = DataFrame(columns=["x", "y"]) + df["x"] = df["x"].astype(np.int64) + df.loc[:, "x"] = data + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_empty_append_single_value(self): # only appends one value expected = DataFrame({"x": [1.0], "y": [np.nan]}) df = DataFrame(columns=["x", "y"], dtype=float) From 2fb1469de8814f5c5a2cd1fd0cd8f1391f267041 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 18 Nov 2020 08:52:40 -0800 Subject: [PATCH 2/3] split test --- pandas/core/indexing.py | 3 ++- pandas/tests/indexing/test_loc.py | 7 ++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 3c60753b5240e..1792d8aaedef4 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1686,8 +1686,9 @@ def _setitem_with_indexer_split_path(self, indexer, value): elif len(ilocs) == 1 and com.is_null_slice(pi) and len(self.obj) == 0: # This is a setitem-with-expansion, see - # test_loc_setitem_empty_append_expands_rows + # test_loc_setitem_empty_append_expands_rows_mixed_dtype # e.g. df = DataFrame(columns=["x", "y"]) + # df["x"] = df["x"].astype(np.int64) # df.loc[:, "x"] = [1, 2, 3] self._setitem_single_column(ilocs[0], value, pi) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 8170b10e9ad6e..da0401a0165c2 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -953,7 +953,12 @@ def test_loc_setitem_empty_append_expands_rows(self): df.loc[:, "x"] = data tm.assert_frame_equal(df, expected) - # same thing, but with mixed dtypes + def test_loc_setitem_empty_append_expands_rows_mixed_dtype(self): + # GH#37932 same as test_loc_setitem_empty_append_expands_rows + # but with mixed dtype so we go through take_split_path + data = [1, 2, 3] + expected = DataFrame({"x": data, "y": [None] * len(data)}) + df = DataFrame(columns=["x", "y"]) df["x"] = df["x"].astype(np.int64) df.loc[:, "x"] = data From 0e7e567f010a6e30983c5046b646a899ac4d88c1 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 26 Nov 2020 13:06:27 -0800 Subject: [PATCH 3/3] whatsnew --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 08edc7531bcd6..e875153626b5a 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -622,6 +622,7 @@ Indexing - Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` raises when the index was of ``object`` dtype and the given numeric label was in the index (:issue:`26491`) - Bug in :meth:`DataFrame.loc` returned requested key plus missing values when ``loc`` was applied to single level from a :class:`MultiIndex` (:issue:`27104`) - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using a listlike indexer containing NA values (:issue:`37722`) +- Bug in :meth:`DataFrame.loc.__setitem__` expanding an empty :class:`DataFrame` with mixed dtypes (:issue:`37932`) - Bug in :meth:`DataFrame.xs` ignored ``droplevel=False`` for columns (:issue:`19056`) - Bug in :meth:`DataFrame.reindex` raising ``IndexingError`` wrongly for empty DataFrame with ``tolerance`` not None or ``method="nearest"`` (:issue:`27315`) - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using listlike indexer that contains elements that are in the index's ``categories`` but not in the index itself failing to raise ``KeyError`` (:issue:`37901`)