From 6c916a935cd7f91ebb3a3682a684acd50e3fad7b Mon Sep 17 00:00:00 2001 From: palewire Date: Sat, 31 Dec 2016 18:17:42 -0800 Subject: [PATCH 01/16] DOC: Added examples to pandas.concat documentation --- pandas/tools/merge.py | 54 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index efae7c63a9d0e..11461ddfa7467 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1398,9 +1398,11 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, copy=True): """ Concatenate pandas objects along a particular axis with optional set logic - along the other axes. Can also add a layer of hierarchical indexing on the - concatenation axis, which may be useful if the labels are the same (or - overlapping) on the passed axis number + along the other axes. + + Can also add a layer of hierarchical indexing on the concatenation axis, + which may be useful if the labels are the same (or overlapping) on + the passed axis number Parameters ---------- @@ -1436,13 +1438,51 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, copy : boolean, default True If False, do not copy data unnecessarily - Notes - ----- - The keys, levels, and names arguments are all optional - Returns ------- concatenated : type of objects + + Notes + ----- + The keys, levels, and names arguments are all optional. + + Examples + -------- + Combine two ``Series``. + + >>> import pandas as pd + >>> s1 = pd.Series(['a', 'b']) + >>> s2 = pd.Series(['c', 'd']) + >>> pd.concat([s1, s2]) + 0 a + 1 b + 0 c + 1 d + + Combine two ``DataFrame`` objects with identical columns. + + >>> df1 = pd.DataFrame( + ... [['a', 1], ['b', 2]], + ... columns=['letter', 'number'] + ... ) + >>> df1 + letter number + 0 a 1 + 1 b 2 + >>> df2 = pd.DataFrame( + ... [['c', 3], ['d', 4]], + ... columns=['letter', 'number'] + ... ) + >>> df2 + letter number + 0 c 3 + 1 d 4 + >>> pd.concat([df1, df2]) + letter number + 0 a 1 + 1 b 2 + 0 c 3 + 1 d 4 """ op = _Concatenator(objs, axis=axis, join_axes=join_axes, ignore_index=ignore_index, join=join, From 67a750c2f2c539c709fd827cf0370d98299cfa37 Mon Sep 17 00:00:00 2001 From: palewire Date: Mon, 2 Jan 2017 08:24:44 -0800 Subject: [PATCH 02/16] Added ignore_index example --- pandas/tools/merge.py | 75 +++++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 31 deletions(-) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 11461ddfa7467..422214aafceb8 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -705,7 +705,7 @@ def _get_join_info(self): _left_join_on_index(left_ax, right_ax, self.left_join_keys, sort=self.sort) - elif self.left_index and self.how == 'right': + elif self.tleft_index and self.how == 'right': join_index, right_indexer, left_indexer = \ _left_join_on_index(right_ax, left_ax, self.right_join_keys, sort=self.sort) @@ -1450,39 +1450,52 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, -------- Combine two ``Series``. - >>> import pandas as pd - >>> s1 = pd.Series(['a', 'b']) - >>> s2 = pd.Series(['c', 'd']) - >>> pd.concat([s1, s2]) - 0 a - 1 b - 0 c - 1 d + >>> import pandas as pd + >>> s1 = pd.Series(['a', 'b']) + >>> s2 = pd.Series(['c', 'd']) + >>> pd.concat([s1, s2]) + 0 a + 1 b + 0 c + 1 d + dtype: object + + Ignore the existing index and reset it in the result + by setting the ``ignore_index`` option to ``True``. + + >>> s1 = pd.Series(['a', 'b']) + >>> s2 = pd.Series(['c', 'd']) + >>> pd.concat([s1, s2], ignore_index=True) + 0 a + 1 b + 2 c + 3 d + dtype: object Combine two ``DataFrame`` objects with identical columns. - >>> df1 = pd.DataFrame( - ... [['a', 1], ['b', 2]], - ... columns=['letter', 'number'] - ... ) - >>> df1 - letter number - 0 a 1 - 1 b 2 - >>> df2 = pd.DataFrame( - ... [['c', 3], ['d', 4]], - ... columns=['letter', 'number'] - ... ) - >>> df2 - letter number - 0 c 3 - 1 d 4 - >>> pd.concat([df1, df2]) - letter number - 0 a 1 - 1 b 2 - 0 c 3 - 1 d 4 + >>> df1 = pd.DataFrame( + ... [['a', 1], ['b', 2]], + ... columns=['letter', 'number'] + ... ) + >>> df1 + letter number + 0 a 1 + 1 b 2 + >>> df2 = pd.DataFrame( + ... [['c', 3], ['d', 4]], + ... columns=['letter', 'number'] + ... ) + >>> df2 + letter number + 0 c 3 + 1 d 4 + >>> pd.concat([df1, df2]) + letter number + 0 a 1 + 1 b 2 + 0 c 3 + 1 d 4 """ op = _Concatenator(objs, axis=axis, join_axes=join_axes, ignore_index=ignore_index, join=join, From 2e8599081627e5ca65192c539374b301450acfff Mon Sep 17 00:00:00 2001 From: palewire Date: Mon, 2 Jan 2017 08:25:12 -0800 Subject: [PATCH 03/16] Removed pandas import from example --- pandas/tools/merge.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 422214aafceb8..a270962e48032 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1450,7 +1450,6 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, -------- Combine two ``Series``. - >>> import pandas as pd >>> s1 = pd.Series(['a', 'b']) >>> s2 = pd.Series(['c', 'd']) >>> pd.concat([s1, s2]) From 709ec2723bcc3bcf96a79812d096f99d187cdca3 Mon Sep 17 00:00:00 2001 From: palewire Date: Mon, 2 Jan 2017 08:30:39 -0800 Subject: [PATCH 04/16] Wording change --- pandas/tools/merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index a270962e48032..fc5c9eb875c56 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1459,7 +1459,7 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, 1 d dtype: object - Ignore the existing index and reset it in the result + Clear the existing index and reset it in the result by setting the ``ignore_index`` option to ``True``. >>> s1 = pd.Series(['a', 'b']) From afa750252ea9d46f8e318d8d58dff6547af7b575 Mon Sep 17 00:00:00 2001 From: palewire Date: Mon, 2 Jan 2017 11:50:45 -0800 Subject: [PATCH 05/16] Added keys example --- pandas/tools/merge.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index fc5c9eb875c56..cfd1394ee145b 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1471,6 +1471,33 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, 3 d dtype: object + Add a ``hierarchical index`` at the outermost level of + the data. + + >>> s1 = pd.Series(['a', 'b', 'c']) + >>> s2 = pd.Series(['c', 'd', 'e']) + >>> c = pd.concat([s1, s2], keys=["s1", 's2',]) + >>> c + s1 0 a + 1 b + 2 c + s2 0 c + 1 d + 2 e + dtype: object + >>> c.ix['s1'] + 0 a + 1 b + 2 c + dtype: object + >>> c.ix['s2'] + 0 c + 1 d + 2 e + dtype: object + >>> c.ix['s1'].ix[0] + 'a' + Combine two ``DataFrame`` objects with identical columns. >>> df1 = pd.DataFrame( From 3887774e1b5e147ff4ff85b5c5ade714603d6303 Mon Sep 17 00:00:00 2001 From: palewire Date: Mon, 2 Jan 2017 12:14:22 -0800 Subject: [PATCH 06/16] Added names options --- pandas/tools/merge.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index cfd1394ee145b..990081a722427 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1498,6 +1498,27 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, >>> c.ix['s1'].ix[0] 'a' + Label the index keys with the ``names`` option. + + >>> s1 = pd.Series(['a', 'b', 'c']) + >>> s2 = pd.Series(['c', 'd', 'e']) + >>> c = pd.concat([s1, s2], keys=["s1", 's2',], names=["Series name", "Row ID"]) + >>> c + Series name Row ID + s1 0 a + 1 b + 2 c + s2 0 c + 1 d + 2 e + dtype: object + >>> c.loc('Series name')['s1'] + Row ID + 0 a + 1 b + 2 c + dtype: object + Combine two ``DataFrame`` objects with identical columns. >>> df1 = pd.DataFrame( From 415f082377a4aeb53d5f994ad1f332557c2a3203 Mon Sep 17 00:00:00 2001 From: palewire Date: Mon, 2 Jan 2017 12:19:38 -0800 Subject: [PATCH 07/16] Make some fixes for @jorisvandenbossche --- pandas/tools/merge.py | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 990081a722427..f813591624bd4 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1471,13 +1471,12 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, 3 d dtype: object - Add a ``hierarchical index`` at the outermost level of + Add a hierarchical index at the outermost level of the data. >>> s1 = pd.Series(['a', 'b', 'c']) >>> s2 = pd.Series(['c', 'd', 'e']) - >>> c = pd.concat([s1, s2], keys=["s1", 's2',]) - >>> c + >>> pd.concat([s1, s2], keys=['s1', 's2',]) s1 0 a 1 b 2 c @@ -1485,25 +1484,16 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, 1 d 2 e dtype: object - >>> c.ix['s1'] - 0 a - 1 b - 2 c - dtype: object - >>> c.ix['s2'] - 0 c - 1 d - 2 e - dtype: object - >>> c.ix['s1'].ix[0] - 'a' Label the index keys with the ``names`` option. >>> s1 = pd.Series(['a', 'b', 'c']) >>> s2 = pd.Series(['c', 'd', 'e']) - >>> c = pd.concat([s1, s2], keys=["s1", 's2',], names=["Series name", "Row ID"]) - >>> c + >>> pd.concat( + ... [s1, s2], + ... keys=['s1', 's2',], + ... names=['Series name', 'Row ID'] + ... ) Series name Row ID s1 0 a 1 b @@ -1512,12 +1502,6 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, 1 d 2 e dtype: object - >>> c.loc('Series name')['s1'] - Row ID - 0 a - 1 b - 2 c - dtype: object Combine two ``DataFrame`` objects with identical columns. From 40ccf8b39bc83bfdd87807853f88d40bbbbf2d3b Mon Sep 17 00:00:00 2001 From: palewire Date: Mon, 2 Jan 2017 12:20:59 -0800 Subject: [PATCH 08/16] Removed typo --- pandas/tools/merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index f813591624bd4..4babbbf402aae 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -705,7 +705,7 @@ def _get_join_info(self): _left_join_on_index(left_ax, right_ax, self.left_join_keys, sort=self.sort) - elif self.tleft_index and self.how == 'right': + elif self.left_index and self.how == 'right': join_index, right_indexer, left_indexer = \ _left_join_on_index(right_ax, left_ax, self.right_join_keys, sort=self.sort) From 2d83cd6f03d67f53cc1245a11fdbb7f372eaedb5 Mon Sep 17 00:00:00 2001 From: palewire Date: Mon, 2 Jan 2017 12:42:28 -0800 Subject: [PATCH 09/16] Added a simple axis=1 example --- pandas/tools/merge.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 4babbbf402aae..b704bcbce29c2 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1527,6 +1527,29 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, 1 b 2 0 c 3 1 d 4 + + Combine ``DataFrame`` objects horizonally along the x axis. + + >>> df1 = pd.DataFrame( + ... [['a', 1], ['b', 2]], + ... columns=['letter', 'number'] + ... ) + >>> df1 + letter number + 0 a 1 + 1 b 2 + >>> df2 = pd.DataFrame( + ... [['c', 3], ['d', 4]], + ... columns=['letter', 'number'] + ... ) + >>> df2 + letter number + 0 c 3 + 1 d 4 + >>> pd.concat([df1, df2], axis=1) + letter number letter number + 0 a 1 b 3 + 1 b 2 c 4 """ op = _Concatenator(objs, axis=axis, join_axes=join_axes, ignore_index=ignore_index, join=join, From 76a9814633130f770f2d0f076973342e6981d540 Mon Sep 17 00:00:00 2001 From: palewire Date: Mon, 2 Jan 2017 13:56:13 -0800 Subject: [PATCH 10/16] An example with a gap is probably better --- pandas/tools/merge.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index b704bcbce29c2..e4cd3204c68a5 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1528,28 +1528,22 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, 0 c 3 1 d 4 - Combine ``DataFrame`` objects horizonally along the x axis. + Combine ``DataFrame`` objects horizonally along the x axis using the index. + >>> import pandas as pd >>> df1 = pd.DataFrame( ... [['a', 1], ['b', 2]], ... columns=['letter', 'number'] ... ) - >>> df1 - letter number - 0 a 1 - 1 b 2 + >>> >>> df2 = pd.DataFrame( - ... [['c', 3], ['d', 4]], + ... [['b', 3],], ... columns=['letter', 'number'] ... ) - >>> df2 - letter number - 0 c 3 - 1 d 4 >>> pd.concat([df1, df2], axis=1) letter number letter number - 0 a 1 b 3 - 1 b 2 c 4 + 0 a 1 b 3.0 + 1 b 2 NaN NaN """ op = _Concatenator(objs, axis=axis, join_axes=join_axes, ignore_index=ignore_index, join=join, From 16b8d28e51a906179426a00919b9f315524b491c Mon Sep 17 00:00:00 2001 From: palewire Date: Mon, 2 Jan 2017 14:02:43 -0800 Subject: [PATCH 11/16] Added join examples --- pandas/tools/merge.py | 53 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index e4cd3204c68a5..a8b878b810d1f 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1528,6 +1528,59 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, 0 c 3 1 d 4 + Combine ``DataFrame`` objects with overlapping columns + and return everything. + + >>> df1 = pd.DataFrame( + ... [['a', 1], ['b', 2]], + ... columns=['letter', 'number'] + ... ) + >>> df1 + letter number + 0 a 1 + 1 b 2 + >>> df2 = pd.DataFrame( + ... [['c', 3, 'cat'], ['d', 4, 'dog']], + ... columns=['letter', 'number', 'animal'] + ... ) + >>> df2 + letter number animal + 0 c 3 cat + 1 d 4 dog + >>> pd.concat([df1, df2]) + animal letter number + 0 NaN a 1 + 1 NaN b 2 + 0 cat c 3 + 1 dog d 4 + + Combine ``DataFrame`` objects with overlapping columns + and return only those that are shared pass ``inner`` to + the ``join`` keyword argument. + + >>> df1 = pd.DataFrame( + ... [['a', 1], ['b', 2]], + ... columns=['letter', 'number'] + ... ) + >>> df1 + letter number + 0 a 1 + 1 b 2 + >>> df2 = pd.DataFrame( + ... [['c', 3, 'cat'], ['d', 4, 'dog']], + ... columns=['letter', 'number', 'animal'] + ... ) + >>> df2 + letter number animal + 0 c 3 cat + 1 d 4 dog + >>> pd.concat([df1, df2], join="inner") + letter number + 0 a 1 + 1 b 2 + 0 c 3 + 1 d 4 + Combine ``DataFrame`` objects horizonally along the x axis using the index. >>> import pandas as pd From f0052809c84004fbad7bd219817e8b388ecfbdf8 Mon Sep 17 00:00:00 2001 From: palewire Date: Mon, 2 Jan 2017 18:51:13 -0800 Subject: [PATCH 12/16] Added link to walkthrough, some see also links as well as an verify index example --- pandas/tools/merge.py | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index a8b878b810d1f..79f8e8d4c8cea 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1446,6 +1446,17 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, ----- The keys, levels, and names arguments are all optional. + A walkthrough of how this method fits in with other tools for combining + panda objects can be found `here `_. + + See Also + -------- + Series.append + DataFrame.append + DataFrame.join + DataFrame.merge + Panel.join + Examples -------- Combine two ``Series``. @@ -1472,7 +1483,7 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, dtype: object Add a hierarchical index at the outermost level of - the data. + the data with the ``keys`` option. >>> s1 = pd.Series(['a', 'b', 'c']) >>> s2 = pd.Series(['c', 'd', 'e']) @@ -1485,7 +1496,7 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, 2 e dtype: object - Label the index keys with the ``names`` option. + Label the index keys you create with the ``names`` option. >>> s1 = pd.Series(['a', 'b', 'c']) >>> s2 = pd.Series(['c', 'd', 'e']) @@ -1529,7 +1540,8 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, 1 d 4 Combine ``DataFrame`` objects with overlapping columns - and return everything. + and return everything. Columns outside the intersection will + be filled with ``NaN`` values. >>> df1 = pd.DataFrame( ... [['a', 1], ['b', 2]], @@ -1555,7 +1567,7 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, 1 dog d 4 Combine ``DataFrame`` objects with overlapping columns - and return only those that are shared pass ``inner`` to + and return only those that are shared by passing ``inner`` to the ``join`` keyword argument. >>> df1 = pd.DataFrame( @@ -1581,9 +1593,9 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, 0 c 3 1 d 4 - Combine ``DataFrame`` objects horizonally along the x axis using the index. + Combine ``DataFrame`` objects horizonally along the x axis by + passing in ``axis=1``. - >>> import pandas as pd >>> df1 = pd.DataFrame( ... [['a', 1], ['b', 2]], ... columns=['letter', 'number'] @@ -1597,6 +1609,20 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, letter number letter number 0 a 1 b 3.0 1 b 2 NaN NaN + + Prevent the result from including duplicate index values with the + ``verify_integrity`` option. + + >>> df1 = pd.DataFrame([1], index=['a']) + >>> df1 + 0 + a 1 + >>> df2 = pd.DataFrame([2], index=['a']) + >>> df2 + 0 + a 2 + >>> pd.concat([df1, df2], verify_integrity=True) + ValueError: Indexes have overlapping values: ['a'] """ op = _Concatenator(objs, axis=axis, join_axes=join_axes, ignore_index=ignore_index, join=join, From 7c5126ffb17c746a6cdd1987035c71661513bd7d Mon Sep 17 00:00:00 2001 From: palewire Date: Tue, 3 Jan 2017 08:57:06 -0800 Subject: [PATCH 13/16] Cleaned up axis example --- pandas/tools/merge.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 79f8e8d4c8cea..38bc56e878c07 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1600,15 +1600,22 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, ... [['a', 1], ['b', 2]], ... columns=['letter', 'number'] ... ) - >>> + >>> df1 + letter number + 0 a 1 + 1 b 2 >>> df2 = pd.DataFrame( - ... [['b', 3],], + ... [['c', 3], ['d', 4]], ... columns=['letter', 'number'] ... ) + >>> df2 + letter number + 0 c 3 + 1 d 4 >>> pd.concat([df1, df2], axis=1) letter number letter number - 0 a 1 b 3.0 - 1 b 2 NaN NaN + 0 a 1 c 3 + 1 b 2 d 4 Prevent the result from including duplicate index values with the ``verify_integrity`` option. From 5141b46f5f86bd71597c1fe3a28f0d1e391f57bf Mon Sep 17 00:00:00 2001 From: palewire Date: Tue, 3 Jan 2017 15:27:35 -0800 Subject: [PATCH 14/16] Changes to concat docs --- pandas/tools/merge.py | 99 ++++++++++--------------------------------- 1 file changed, 22 insertions(+), 77 deletions(-) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 38bc56e878c07..0c455d901c6d3 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1402,7 +1402,7 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, Can also add a layer of hierarchical indexing on the concatenation axis, which may be useful if the labels are the same (or overlapping) on - the passed axis number + the passed axis number. Parameters ---------- @@ -1473,8 +1473,6 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, Clear the existing index and reset it in the result by setting the ``ignore_index`` option to ``True``. - >>> s1 = pd.Series(['a', 'b']) - >>> s2 = pd.Series(['c', 'd']) >>> pd.concat([s1, s2], ignore_index=True) 0 a 1 b @@ -1485,49 +1483,34 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, Add a hierarchical index at the outermost level of the data with the ``keys`` option. - >>> s1 = pd.Series(['a', 'b', 'c']) - >>> s2 = pd.Series(['c', 'd', 'e']) >>> pd.concat([s1, s2], keys=['s1', 's2',]) s1 0 a 1 b - 2 c s2 0 c 1 d - 2 e dtype: object Label the index keys you create with the ``names`` option. - >>> s1 = pd.Series(['a', 'b', 'c']) - >>> s2 = pd.Series(['c', 'd', 'e']) - >>> pd.concat( - ... [s1, s2], - ... keys=['s1', 's2',], - ... names=['Series name', 'Row ID'] - ... ) + >>> pd.concat([s1, s2], keys=['s1', 's2'], + ... names=['Series name', 'Row ID']) Series name Row ID s1 0 a 1 b - 2 c s2 0 c 1 d - 2 e dtype: object Combine two ``DataFrame`` objects with identical columns. - >>> df1 = pd.DataFrame( - ... [['a', 1], ['b', 2]], - ... columns=['letter', 'number'] - ... ) + >>> df1 = pd.DataFrame([['a', 1], ['b', 2]], + ... columns=['letter', 'number']) >>> df1 letter number 0 a 1 1 b 2 - >>> df2 = pd.DataFrame( - ... [['c', 3], ['d', 4]], - ... columns=['letter', 'number'] - ... ) + >>> df2 = pd.DataFrame([['c', 3], ['d', 4]], + ... columns=['letter', 'number']) >>> df2 letter number 0 c 3 @@ -1543,23 +1526,15 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, and return everything. Columns outside the intersection will be filled with ``NaN`` values. - >>> df1 = pd.DataFrame( - ... [['a', 1], ['b', 2]], - ... columns=['letter', 'number'] - ... ) - >>> df1 - letter number - 0 a 1 - 1 b 2 - >>> df2 = pd.DataFrame( + >>> df3 = pd.DataFrame( ... [['c', 3, 'cat'], ['d', 4, 'dog']], ... columns=['letter', 'number', 'animal'] ... ) - >>> df2 + >>> df3 letter number animal 0 c 3 cat 1 d 4 dog - >>> pd.concat([df1, df2]) + >>> pd.concat([df1, df3]) animal letter number 0 NaN a 1 1 NaN b 2 @@ -1570,23 +1545,7 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, and return only those that are shared by passing ``inner`` to the ``join`` keyword argument. - >>> df1 = pd.DataFrame( - ... [['a', 1], ['b', 2]], - ... columns=['letter', 'number'] - ... ) - >>> df1 - letter number - 0 a 1 - 1 b 2 - >>> df2 = pd.DataFrame( - ... [['c', 3, 'cat'], ['d', 4, 'dog']], - ... columns=['letter', 'number', 'animal'] - ... ) - >>> df2 - letter number animal - 0 c 3 cat - 1 d 4 dog - >>> pd.concat([df1, df2], join="inner") + >>> pd.concat([df1, df3], join="inner") letter number 0 a 1 1 b 2 @@ -1596,39 +1555,25 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, Combine ``DataFrame`` objects horizonally along the x axis by passing in ``axis=1``. - >>> df1 = pd.DataFrame( - ... [['a', 1], ['b', 2]], - ... columns=['letter', 'number'] - ... ) - >>> df1 - letter number - 0 a 1 - 1 b 2 - >>> df2 = pd.DataFrame( - ... [['c', 3], ['d', 4]], - ... columns=['letter', 'number'] - ... ) - >>> df2 - letter number - 0 c 3 - 1 d 4 - >>> pd.concat([df1, df2], axis=1) - letter number letter number - 0 a 1 c 3 - 1 b 2 d 4 + >>> df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']], + ... columns=['animal', 'name']) + >>> pd.concat([df1, df4], axis=1) + letter number animal name + 0 a 1 bird polly + 1 b 2 monkey george Prevent the result from including duplicate index values with the ``verify_integrity`` option. - >>> df1 = pd.DataFrame([1], index=['a']) - >>> df1 + >>> df5 = pd.DataFrame([1], index=['a']) + >>> df5 0 a 1 - >>> df2 = pd.DataFrame([2], index=['a']) - >>> df2 + >>> df6 = pd.DataFrame([2], index=['a']) + >>> df6 0 a 2 - >>> pd.concat([df1, df2], verify_integrity=True) + >>> pd.concat([df5, df6], verify_integrity=True) ValueError: Indexes have overlapping values: ['a'] """ op = _Concatenator(objs, axis=axis, join_axes=join_axes, From c58f9f8c40ed525f05de2cb124ab7e5f1dbe7b9d Mon Sep 17 00:00:00 2001 From: palewire Date: Tue, 3 Jan 2017 15:30:34 -0800 Subject: [PATCH 15/16] Corrected indentation --- pandas/tools/merge.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 0c455d901c6d3..1a9d1cd5d5ed1 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1526,10 +1526,8 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, and return everything. Columns outside the intersection will be filled with ``NaN`` values. - >>> df3 = pd.DataFrame( - ... [['c', 3, 'cat'], ['d', 4, 'dog']], - ... columns=['letter', 'number', 'animal'] - ... ) + >>> df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']], + ... columns=['letter', 'number', 'animal']) >>> df3 letter number animal 0 c 3 cat From 780d2066a26c32b2ffd10f805c01f862a93226d3 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 4 Jan 2017 14:49:53 +0100 Subject: [PATCH 16/16] correct link to tutorial docs --- pandas/tools/merge.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 1a9d1cd5d5ed1..4012629aa3c90 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1447,7 +1447,8 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, The keys, levels, and names arguments are all optional. A walkthrough of how this method fits in with other tools for combining - panda objects can be found `here `_. + panda objects can be found `here + `__. See Also -------- @@ -1455,7 +1456,6 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, DataFrame.append DataFrame.join DataFrame.merge - Panel.join Examples -------- @@ -1550,7 +1550,7 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, 0 c 3 1 d 4 - Combine ``DataFrame`` objects horizonally along the x axis by + Combine ``DataFrame`` objects horizontally along the x axis by passing in ``axis=1``. >>> df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']],