From cce39ffaf44e61d90f7693606400122e13635efe Mon Sep 17 00:00:00 2001 From: ikudrautsau Date: Sun, 21 May 2023 23:51:57 +0200 Subject: [PATCH 01/10] BUG: Rows are not inserted into DataFrame from lists of one element in v2.0 #52825 --- pandas/core/indexing.py | 8 ++++++++ pandas/tests/indexing/test_indexing.py | 12 ++++++++++++ 2 files changed, 20 insertions(+) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 38bf6c34bf9c9..1f5aa8a07b660 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2024,10 +2024,18 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None: is_null_setter = com.is_empty_slice(pi) or is_array_like(pi) and len(pi) == 0 + is_empty_df_setter = com.is_null_slice(pi) and self.obj._mgr.arrays[0].size == 0 + if is_null_setter: # no-op, don't cast dtype later return + elif is_empty_df_setter: + # If we're setting a column to an empty df with null slice, + # we shouldn't do it inplace. + # GH#52825 + self.obj.isetitem(loc, value) + elif is_full_setter: try: self.obj._mgr.column_setitem( diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 21036598f46df..baf6fc5bf3d78 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -631,6 +631,18 @@ def test_index_type_coercion(self, indexer): indexer(s2)["0"] = 0 assert is_object_dtype(s2.index) + def test_setitem_one_element_list(self): + # GH#52825 + texts=['abc'] + languages=['en'] + df = pd.DataFrame(columns=[ + 'text', + 'language' + ]) + df.loc[:, 'text'] = texts + df.loc[:, 'language'] = languages + assert df.size == 2 + class TestMisc: def test_float_index_to_mixed(self): From 0e8ded5f41ee0889d063404e2938ce47cdb99213 Mon Sep 17 00:00:00 2001 From: ikudrautsau Date: Mon, 22 May 2023 00:15:23 +0200 Subject: [PATCH 02/10] fix pre-commit issue --- pandas/tests/indexing/test_indexing.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index baf6fc5bf3d78..b9621cdb69f84 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -633,14 +633,14 @@ def test_index_type_coercion(self, indexer): def test_setitem_one_element_list(self): # GH#52825 - texts=['abc'] - languages=['en'] + texts=["abc"] + languages=["en"] df = pd.DataFrame(columns=[ - 'text', - 'language' + "text", + "language" ]) - df.loc[:, 'text'] = texts - df.loc[:, 'language'] = languages + df.loc[:, "text"] = texts + df.loc[:, "language"] = languages assert df.size == 2 From b6a32a29391c93b2ea22fddd0787b0d0f803349f Mon Sep 17 00:00:00 2001 From: ikudrautsau Date: Mon, 22 May 2023 00:21:16 +0200 Subject: [PATCH 03/10] fix pre-commit issue --- pandas/tests/indexing/test_indexing.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index b9621cdb69f84..d6872910d083c 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -633,12 +633,9 @@ def test_index_type_coercion(self, indexer): def test_setitem_one_element_list(self): # GH#52825 - texts=["abc"] - languages=["en"] - df = pd.DataFrame(columns=[ - "text", - "language" - ]) + texts = ["abc"] + languages = ["en"] + df = pd.DataFrame(columns=["text", "language"]) df.loc[:, "text"] = texts df.loc[:, "language"] = languages assert df.size == 2 From aff0060ff795ff334f2e0b8e97117b447d5c5886 Mon Sep 17 00:00:00 2001 From: ikudrautsau Date: Mon, 22 May 2023 00:30:08 +0200 Subject: [PATCH 04/10] fix inconsistent use of pandas namespace --- pandas/tests/indexing/test_indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index d6872910d083c..75a7cc1629a74 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -635,7 +635,7 @@ def test_setitem_one_element_list(self): # GH#52825 texts = ["abc"] languages = ["en"] - df = pd.DataFrame(columns=["text", "language"]) + df = DataFrame(columns=["text", "language"]) df.loc[:, "text"] = texts df.loc[:, "language"] = languages assert df.size == 2 From d38391bc6ad0f3d8fb27f1dacf04e75feedc758f Mon Sep 17 00:00:00 2001 From: ikudrautsau Date: Mon, 22 May 2023 22:35:16 +0200 Subject: [PATCH 05/10] revert test_loc_setitem_consistency_empty test case after #49775 --- pandas/tests/indexing/test_indexing.py | 10 +++++----- pandas/tests/indexing/test_loc.py | 5 +---- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 75a7cc1629a74..6c49d6208c8b5 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -633,11 +633,11 @@ def test_index_type_coercion(self, indexer): def test_setitem_one_element_list(self): # GH#52825 - texts = ["abc"] - languages = ["en"] - df = DataFrame(columns=["text", "language"]) - df.loc[:, "text"] = texts - df.loc[:, "language"] = languages + x_values = ["x_value"] + y_values = ["y_values"] + df = DataFrame(columns=["x", "y"]) + df.loc[:, "x"] = x_values + df.loc[:, "y"] = y_values assert df.size == 2 diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 19421345087fc..ddd70f4f68c39 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -614,18 +614,15 @@ def test_loc_setitem_consistency_single_row(self): def test_loc_setitem_consistency_empty(self): # empty (essentially noops) - # before the enforcement of #45333 in 2.0, the loc.setitem here would - # change the dtype of df.x to int64 expected = DataFrame(columns=["x", "y"]) + expected["x"] = expected["x"].astype(np.int64) df = DataFrame(columns=["x", "y"]) with tm.assert_produces_warning(None): df.loc[:, "x"] = 1 tm.assert_frame_equal(df, expected) - # setting with setitem swaps in a new array, so changes the dtype df = DataFrame(columns=["x", "y"]) df["x"] = 1 - expected["x"] = expected["x"].astype(np.int64) tm.assert_frame_equal(df, expected) def test_loc_setitem_consistency_slice_column_len(self): From 8c6859bf5b0bc16a7d91c30e0b1e8c6d181b3b31 Mon Sep 17 00:00:00 2001 From: ikudrautsau Date: Tue, 23 May 2023 00:41:27 +0200 Subject: [PATCH 06/10] update documentation --- doc/source/whatsnew/v2.0.2.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.0.2.rst b/doc/source/whatsnew/v2.0.2.rst index 52d2730195a56..33c3c900d29e7 100644 --- a/doc/source/whatsnew/v2.0.2.rst +++ b/doc/source/whatsnew/v2.0.2.rst @@ -19,6 +19,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.loc` losing :class:`MultiIndex` name when enlarging object (:issue:`53053`) - Fixed regression in :meth:`DataFrame.to_string` printing a backslash at the end of the first row of data, instead of headers, when the DataFrame doesn't fit the line width (:issue:`53054`) - Fixed regression in :meth:`MultiIndex.join` returning levels in wrong order (:issue:`53093`) +- Fixed regression in :meth:`DataFrame.loc` Rows are not inserted into DataFrame from lists of one element (:issue:`52825`) .. --------------------------------------------------------------------------- .. _whatsnew_202.bug_fixes: From 7902a291aa27a7a1897293fec31a5fbceb4f829d Mon Sep 17 00:00:00 2001 From: ikudrautsau Date: Tue, 23 May 2023 07:47:49 +0200 Subject: [PATCH 07/10] lines in whatsnew should be sorted alphabetically --- doc/source/whatsnew/v2.0.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.2.rst b/doc/source/whatsnew/v2.0.2.rst index 33c3c900d29e7..e9c4f1cd305cf 100644 --- a/doc/source/whatsnew/v2.0.2.rst +++ b/doc/source/whatsnew/v2.0.2.rst @@ -17,9 +17,9 @@ Fixed regressions - Fixed regression in :func:`merge` on Windows when dtype is ``np.intc`` (:issue:`52451`) - Fixed regression in :func:`read_sql` dropping columns with duplicated column names (:issue:`53117`) - Fixed regression in :meth:`DataFrame.loc` losing :class:`MultiIndex` name when enlarging object (:issue:`53053`) +- Fixed regression in :meth:`DataFrame.loc` rows are not inserted into DataFrame from lists of one element (:issue:`52825`) - Fixed regression in :meth:`DataFrame.to_string` printing a backslash at the end of the first row of data, instead of headers, when the DataFrame doesn't fit the line width (:issue:`53054`) - Fixed regression in :meth:`MultiIndex.join` returning levels in wrong order (:issue:`53093`) -- Fixed regression in :meth:`DataFrame.loc` Rows are not inserted into DataFrame from lists of one element (:issue:`52825`) .. --------------------------------------------------------------------------- .. _whatsnew_202.bug_fixes: From 2118e1b8ba50ae92c417846b62917eb87fb117f6 Mon Sep 17 00:00:00 2001 From: ikudrautsau Date: Tue, 23 May 2023 11:16:36 +0200 Subject: [PATCH 08/10] fix trailing space --- doc/source/whatsnew/v2.0.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.2.rst b/doc/source/whatsnew/v2.0.2.rst index e9c4f1cd305cf..7ac9aa35653a8 100644 --- a/doc/source/whatsnew/v2.0.2.rst +++ b/doc/source/whatsnew/v2.0.2.rst @@ -17,7 +17,7 @@ Fixed regressions - Fixed regression in :func:`merge` on Windows when dtype is ``np.intc`` (:issue:`52451`) - Fixed regression in :func:`read_sql` dropping columns with duplicated column names (:issue:`53117`) - Fixed regression in :meth:`DataFrame.loc` losing :class:`MultiIndex` name when enlarging object (:issue:`53053`) -- Fixed regression in :meth:`DataFrame.loc` rows are not inserted into DataFrame from lists of one element (:issue:`52825`) +- Fixed regression in :meth:`DataFrame.loc` rows are not inserted into DataFrame from lists of one element (:issue:`52825`) - Fixed regression in :meth:`DataFrame.to_string` printing a backslash at the end of the first row of data, instead of headers, when the DataFrame doesn't fit the line width (:issue:`53054`) - Fixed regression in :meth:`MultiIndex.join` returning levels in wrong order (:issue:`53093`) From d0c4a1b4006d418f3c04f89cb8b058673838a5fe Mon Sep 17 00:00:00 2001 From: ikudrautsau Date: Wed, 24 May 2023 23:14:22 +0200 Subject: [PATCH 09/10] fixes according to reviewer's comments --- doc/source/whatsnew/v2.0.2.rst | 2 +- pandas/core/indexing.py | 2 +- pandas/tests/indexing/test_indexing.py | 6 ++++-- pandas/tests/indexing/test_loc.py | 1 + 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v2.0.2.rst b/doc/source/whatsnew/v2.0.2.rst index 7ac9aa35653a8..04ceb285ba088 100644 --- a/doc/source/whatsnew/v2.0.2.rst +++ b/doc/source/whatsnew/v2.0.2.rst @@ -17,7 +17,7 @@ Fixed regressions - Fixed regression in :func:`merge` on Windows when dtype is ``np.intc`` (:issue:`52451`) - Fixed regression in :func:`read_sql` dropping columns with duplicated column names (:issue:`53117`) - Fixed regression in :meth:`DataFrame.loc` losing :class:`MultiIndex` name when enlarging object (:issue:`53053`) -- Fixed regression in :meth:`DataFrame.loc` rows are not inserted into DataFrame from lists of one element (:issue:`52825`) +- Fixed regression in :meth:`DataFrame.loc` when the DataFrame is empty and the item being set is a single-element list (:issue:`52825`) - Fixed regression in :meth:`DataFrame.to_string` printing a backslash at the end of the first row of data, instead of headers, when the DataFrame doesn't fit the line width (:issue:`53054`) - Fixed regression in :meth:`MultiIndex.join` returning levels in wrong order (:issue:`53093`) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 1f5aa8a07b660..505e07f8baa70 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2024,7 +2024,7 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None: is_null_setter = com.is_empty_slice(pi) or is_array_like(pi) and len(pi) == 0 - is_empty_df_setter = com.is_null_slice(pi) and self.obj._mgr.arrays[0].size == 0 + is_empty_df_setter = com.is_null_slice(pi) and len(self.obj) == 0 if is_null_setter: # no-op, don't cast dtype later diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 6c49d6208c8b5..6e829309e1630 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -634,11 +634,13 @@ def test_index_type_coercion(self, indexer): def test_setitem_one_element_list(self): # GH#52825 x_values = ["x_value"] - y_values = ["y_values"] + y_values = ["y_value"] df = DataFrame(columns=["x", "y"]) df.loc[:, "x"] = x_values df.loc[:, "y"] = y_values - assert df.size == 2 + + expected = DataFrame({"x": ["x_value"], "y": ["y_value"]}) + tm.assert_frame_equal(df, expected) class TestMisc: diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index ddd70f4f68c39..68c55682f6ec7 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -614,6 +614,7 @@ def test_loc_setitem_consistency_single_row(self): def test_loc_setitem_consistency_empty(self): # empty (essentially noops) + # the loc.setitem here changes the dtype of df.x to int64 expected = DataFrame(columns=["x", "y"]) expected["x"] = expected["x"].astype(np.int64) df = DataFrame(columns=["x", "y"]) From b3e84479f66661af04e4ef19981872beb6b4fc04 Mon Sep 17 00:00:00 2001 From: ikudrautsau Date: Tue, 30 May 2023 23:30:17 +0200 Subject: [PATCH 10/10] update v2.0.3 instead of v2.0.2 --- doc/source/whatsnew/v2.0.2.rst | 1 - doc/source/whatsnew/v2.0.3.rst | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.0.2.rst b/doc/source/whatsnew/v2.0.2.rst index 35f23c941e0d3..559078d501a00 100644 --- a/doc/source/whatsnew/v2.0.2.rst +++ b/doc/source/whatsnew/v2.0.2.rst @@ -17,7 +17,6 @@ Fixed regressions - Fixed regression in :func:`merge` on Windows when dtype is ``np.intc`` (:issue:`52451`) - Fixed regression in :func:`read_sql` dropping columns with duplicated column names (:issue:`53117`) - Fixed regression in :meth:`DataFrame.loc` losing :class:`MultiIndex` name when enlarging object (:issue:`53053`) -- Fixed regression in :meth:`DataFrame.loc` when the DataFrame is empty and the item being set is a single-element list (:issue:`52825`) - Fixed regression in :meth:`DataFrame.to_string` printing a backslash at the end of the first row of data, instead of headers, when the DataFrame doesn't fit the line width (:issue:`53054`) - Fixed regression in :meth:`MultiIndex.join` returning levels in wrong order (:issue:`53093`) diff --git a/doc/source/whatsnew/v2.0.3.rst b/doc/source/whatsnew/v2.0.3.rst index 73779d7e4cc74..3c0a0a54f4908 100644 --- a/doc/source/whatsnew/v2.0.3.rst +++ b/doc/source/whatsnew/v2.0.3.rst @@ -13,7 +13,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ -- +- Fixed regression in :meth:`DataFrame.loc` rows are not inserted into DataFrame from lists of one element (:issue:`52825`) .. --------------------------------------------------------------------------- .. _whatsnew_203.bug_fixes: