From 8266cdc3924d7166eacfb296d038b542c60ef569 Mon Sep 17 00:00:00 2001 From: Eduardo Blancas Reyes Date: Mon, 1 Feb 2016 10:00:23 -0600 Subject: [PATCH 1/2] DOC: improves DataFrame.join documentation DOC: improves DataFrame.join documentation --- doc/source/merging.rst | 6 ++--- pandas/core/frame.py | 58 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 52 insertions(+), 12 deletions(-) diff --git a/doc/source/merging.rst b/doc/source/merging.rst index 074b15bbbcb66..feb6e4834a754 100644 --- a/doc/source/merging.rst +++ b/doc/source/merging.rst @@ -558,10 +558,8 @@ DataFrame instance method, with the calling DataFrame being implicitly considered the left object in the join. The related ``DataFrame.join`` method, uses ``merge`` internally for the -index-on-index and index-on-column(s) joins, but *joins on indexes* by default -rather than trying to join on common columns (the default behavior for -``merge``). If you are joining on index, you may wish to use ``DataFrame.join`` -to save yourself some typing. +index-on-index (by default) and column(s)-on-index join. If you are joining on +index only, you may wish to use ``DataFrame.join`` to save yourself some typing. Brief primer on merge methods (relational algebra) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 41a4cd0d77508..c2f04a3743c34 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4318,18 +4318,20 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', Series is passed, its name attribute must be set, and that will be used as the column name in the resulting joined DataFrame on : column name, tuple/list of column names, or array-like - Column(s) to use for joining, otherwise join on index. If multiples + Column(s) in the caller to join on the index in other, + otherwise joins index-on-index. If multiples columns given, the passed DataFrame must have a MultiIndex. Can pass an array as the join key if not already contained in the calling DataFrame. Like an Excel VLOOKUP operation how : {'left', 'right', 'outer', 'inner'} - How to handle indexes of the two objects. Default: 'left' - for joining on index, None otherwise - - * left: use calling frame's index - * right: use input frame's index - * outer: form union of indexes - * inner: use intersection of indexes + How to handle the operation of the two objects. Default: 'left' + + * left: use calling frame's index (or column if on is specified) + * right: use other frame's index + * outer: form union of calling frame's index (or column if on is + specified) with other frame's index + * inner: form intersection of calling frame's index (or column if + on is specified) with other frame's index lsuffix : string Suffix to use from left frame's overlapping columns rsuffix : string @@ -4343,6 +4345,46 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', on, lsuffix, and rsuffix options are not supported when passing a list of DataFrame objects + Examples + -------- + >>> caller = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'], + ... 'A': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5']}) + + >>> caller + A key + 0 A0 K0 + 1 A1 K1 + 2 A2 K2 + 3 A3 K3 + 4 A4 K4 + 5 A5 K5 + + >>> other = pd.DataFrame({'key': ['K0', 'K1', 'K2'], + ... 'B': ['B0', 'B1', 'B2']}) + + >>> other + B key + 0 B0 K0 + 1 B1 K1 + 2 B2 K2 + + Perform a left join using caller's key column and other frame's index + + >>> caller.join(other.set_index('key'), on='key', how='left', + ... lsuffix='_l', rsuffix='_r') + + >>> A key B + 0 A0 K0 B0 + 1 A1 K1 B1 + 2 A2 K2 B2 + 3 A3 K3 NaN + 4 A4 K4 NaN + 5 A5 K5 NaN + + See also + -------- + DataFrame.merge : For column(s)-on-columns(s) operations + Returns ------- joined : DataFrame From a66f2ea3e26c349561838dbb6609ac5f9aabcfc8 Mon Sep 17 00:00:00 2001 From: Eduardo Blancas Reyes Date: Wed, 25 May 2016 18:53:52 -0500 Subject: [PATCH 2/2] DOC: improves DataFrame.join documentation --- pandas/core/frame.py | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c2f04a3743c34..1ca0b4e395b3f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4368,10 +4368,40 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', 1 B1 K1 2 B2 K2 - Perform a left join using caller's key column and other frame's index + Join DataFrames using their indexes. - >>> caller.join(other.set_index('key'), on='key', how='left', - ... lsuffix='_l', rsuffix='_r') + >>> caller.join(other, lsuffix='_caller', rsuffix='_other') + + >>> A key_caller B key_other + 0 A0 K0 B0 K0 + 1 A1 K1 B1 K1 + 2 A2 K2 B2 K2 + 3 A3 K3 NaN NaN + 4 A4 K4 NaN NaN + 5 A5 K5 NaN NaN + + + If we want to join using the key columns, we need to set key to be + the index in both caller and other. The joined DataFrame will have + key as its index. + + >>> caller.set_index('key').join(other.set_index('key')) + + >>> A B + key + K0 A0 B0 + K1 A1 B1 + K2 A2 B2 + K3 A3 NaN + K4 A4 NaN + K5 A5 NaN + + Another option to join using the key columns is to use the on + parameter. DataFrame.join always uses other's index but we can use any + column in the caller. This method preserves the original caller's + index in the result. + + >>> caller.join(other.set_index('key'), on='key') >>> A key B 0 A0 K0 B0 @@ -4381,6 +4411,7 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', 4 A4 K4 NaN 5 A5 K5 NaN + See also -------- DataFrame.merge : For column(s)-on-columns(s) operations