From 27ea80777f5bf514d3a488c909a530f726e23552 Mon Sep 17 00:00:00 2001 From: Thomas Kastl Date: Tue, 3 Aug 2021 14:58:31 +0200 Subject: [PATCH 1/2] Add warning about NA merges --- .../getting_started/comparison/comparison_with_sql.rst | 7 +++++++ pandas/core/frame.py | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/doc/source/getting_started/comparison/comparison_with_sql.rst b/doc/source/getting_started/comparison/comparison_with_sql.rst index 49a21f87382b3..787aa48991ae3 100644 --- a/doc/source/getting_started/comparison/comparison_with_sql.rst +++ b/doc/source/getting_started/comparison/comparison_with_sql.rst @@ -233,6 +233,13 @@ default, :meth:`~pandas.DataFrame.join` will join the DataFrames on their indice parameters allowing you to specify the type of join to perform (``LEFT``, ``RIGHT``, ``INNER``, ``FULL``) or the columns to join on (column names or indices). +.. warning:: + + If both key columns contain rows where the key is ``NA`` or ``NaN``, those + rows will be matched against each other. This is different from usual SQL + join behaviour and can lead to unexpected results. Until this is + fixed make sure to sanitize your input dataframes! + .. ipython:: python df1 = pd.DataFrame({"key": ["A", "B", "C", "D"], "value": np.random.randn(4)}) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 48b18a33f9c9f..75302256e21c1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -265,6 +265,13 @@ When performing a cross merge, no column specifications to merge on are allowed. +.. warning:: + + If both key columns contain rows where the key is ``NA`` or ``NaN``, those + rows will be matched against each other. This is different from usual SQL + join behaviour and can lead to unexpected results. Until this is + fixed make sure to sanitize your input dataframes! + Parameters ----------%s right : DataFrame or named Series From fc4916bb4f709c36b81189741ff82f1ec950fe03 Mon Sep 17 00:00:00 2001 From: Thomas Kastl Date: Fri, 6 Aug 2021 15:50:30 +0200 Subject: [PATCH 2/2] Add changes recommended in PR --- .../getting_started/comparison/comparison_with_sql.rst | 5 ++--- pandas/core/frame.py | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/doc/source/getting_started/comparison/comparison_with_sql.rst b/doc/source/getting_started/comparison/comparison_with_sql.rst index 787aa48991ae3..0596687b4d15e 100644 --- a/doc/source/getting_started/comparison/comparison_with_sql.rst +++ b/doc/source/getting_started/comparison/comparison_with_sql.rst @@ -235,10 +235,9 @@ parameters allowing you to specify the type of join to perform (``LEFT``, ``RIGH .. warning:: - If both key columns contain rows where the key is ``NA`` or ``NaN``, those + If both key columns contain rows where the key is a null value, those rows will be matched against each other. This is different from usual SQL - join behaviour and can lead to unexpected results. Until this is - fixed make sure to sanitize your input dataframes! + join behaviour and can lead to unexpected results. .. ipython:: python diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 75302256e21c1..cfb9740f8c988 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -267,10 +267,9 @@ .. warning:: - If both key columns contain rows where the key is ``NA`` or ``NaN``, those + If both key columns contain rows where the key is a null value, those rows will be matched against each other. This is different from usual SQL - join behaviour and can lead to unexpected results. Until this is - fixed make sure to sanitize your input dataframes! + join behaviour and can lead to unexpected results. Parameters ----------%s