From 44c9a14014152c928dd79ca7e81d16bae03c1838 Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Sun, 26 Jan 2025 10:56:15 +0200 Subject: [PATCH 1/5] Add test --- pandas/tests/frame/methods/test_combine_first.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index a70876b5a96ca..1e594043510ea 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -380,7 +380,7 @@ def test_combine_first_with_asymmetric_other(self, val): df2 = DataFrame({"isBool": [True]}) res = df1.combine_first(df2) - exp = DataFrame({"isBool": [True], "isNum": [val]}) + exp = DataFrame({"isNum": [val], "isBool": [True]}) tm.assert_frame_equal(res, exp) @@ -555,3 +555,13 @@ def test_combine_first_empty_columns(): result = left.combine_first(right) expected = DataFrame(columns=["a", "b", "c"]) tm.assert_frame_equal(result, expected) + + +def test_combine_first_preserve_column_order(): + # GH#60427 + df1 = DataFrame({"B": [1, 2, 3], "A": [4, None, 6]}) + df2 = DataFrame({"A": [5]}, index=[1]) + + result = df1.combine_first(df2) + expected = DataFrame({"B": [1, 2, 3], "A": [4.0, 5.0, 6.0]}) + tm.assert_frame_equal(result, expected) From e2f4afd7eed4c7a7b563e6765b014a22639c4048 Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Sun, 26 Jan 2025 10:57:10 +0200 Subject: [PATCH 2/5] Fix combine_first reorders columns --- pandas/core/frame.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index af66bb54610f1..7e90e863f3c2a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8806,7 +8806,10 @@ def combiner(x: Series, y: Series): ) combined = combined.astype(other.dtypes) else: + # preserve column order + new_columns = self.columns.union(other.columns, sort=False) combined = self.combine(other, combiner, overwrite=False) + combined = combined.reindex(columns=new_columns) dtypes = { col: find_common_type([self.dtypes[col], other.dtypes[col]]) From aefa296d1a465ac827c0fa78379673573a5e02a9 Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Sun, 26 Jan 2025 11:03:33 +0200 Subject: [PATCH 3/5] Add whatsnew --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 1d8d0f6a74cb1..a7f63d75a047e 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -758,6 +758,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ - Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`) +- Bug in :meth:`DataFrame.combine_first` not preserving the column order (:issue:`60427`) - Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) - Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`) - Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`) From 58230c95b92bd54bba949d6a8625cf0e522dcfc5 Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Sun, 26 Jan 2025 20:38:51 +0200 Subject: [PATCH 4/5] Fix corner case when self is empty and future.infer_string is True --- pandas/core/frame.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7e90e863f3c2a..2cceda65abd8f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8672,6 +8672,9 @@ def combine( """ other_idxlen = len(other.index) # save for compare + # preserve column order + new_columns = self.columns.union(other.columns, sort=False) + this, other = self.align(other) new_index = this.index @@ -8681,8 +8684,6 @@ def combine( if self.empty and len(other) == other_idxlen: return other.copy() - # sorts if possible; otherwise align above ensures that these are set-equal - new_columns = this.columns.union(other.columns) do_fill = fill_value is not None result = {} for col in new_columns: @@ -8806,10 +8807,7 @@ def combiner(x: Series, y: Series): ) combined = combined.astype(other.dtypes) else: - # preserve column order - new_columns = self.columns.union(other.columns, sort=False) combined = self.combine(other, combiner, overwrite=False) - combined = combined.reindex(columns=new_columns) dtypes = { col: find_common_type([self.dtypes[col], other.dtypes[col]]) From cc61621a43c188f1ce847f750ea95496eaf7dbbf Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Mon, 27 Jan 2025 20:50:33 +0200 Subject: [PATCH 5/5] Update --- pandas/core/frame.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2cceda65abd8f..3669d8249dd27 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8671,9 +8671,7 @@ def combine( 2 NaN 3.0 1.0 """ other_idxlen = len(other.index) # save for compare - - # preserve column order - new_columns = self.columns.union(other.columns, sort=False) + other_columns = other.columns this, other = self.align(other) new_index = this.index @@ -8684,6 +8682,8 @@ def combine( if self.empty and len(other) == other_idxlen: return other.copy() + # preserve column order + new_columns = self.columns.union(other_columns, sort=False) do_fill = fill_value is not None result = {} for col in new_columns: