From a73bd9a65022b7fddf5918fa8b939ac4492466ab Mon Sep 17 00:00:00 2001 From: Samuel Date: Tue, 20 Mar 2018 20:29:18 +0000 Subject: [PATCH 01/25] Fix docstring or pandas.DataFrame.stack. - Make description and summary clearer. - Fix doctests --- pandas/core/frame.py | 47 +++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index efb002474f876..3f2e7189ce74f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5145,36 +5145,51 @@ def pivot_table(self, values=None, index=None, columns=None, def stack(self, level=-1, dropna=True): """ - Pivot a level of the (possibly hierarchical) column labels, returning a - DataFrame (or Series in the case of an object with a single level of - column labels) having a hierarchical index with a new inner-most level - of row labels. - The level involved will automatically get sorted. + Stack the prescribed level(s) from the column axis onto the index + axis. + + Return a reshaped DataFrame or Series having a multi-level + index with one or more new inner-most levels compared to the current + dataframe. The new inner-most levels are created by pivoting the + columns of the current dataframe: + + - if the columns have a single level, the output is a Series; + - if the columns have multiple levels, the new index level + is taken from the prescribed level(s) and the output is a + DataFrame. + + The new index levels are sorted. Parameters ---------- - level : int, string, or list of these, default last level - Level(s) to stack, can pass level name + level : int, string, list, default last level + Level(s) to stack from the column axis, defined as + integers or strings. dropna : boolean, default True - Whether to drop rows in the resulting Frame/Series with no valid - values + Whether to drop rows in the resulting Frame/Series with no + valid values. Examples ---------- + >>> s = pd.DataFrame([[0, 1], [2, 3]], index=['one', 'two'], columns=['a', 'b']) >>> s a b - one 1. 2. - two 3. 4. - + one 0 1 + two 2 3 >>> s.stack() - one a 1 - b 2 - two a 3 - b 4 + one a 0 + b 1 + two a 2 + b 3 + dtype: int64 Returns ------- stacked : DataFrame or Series + + See Also + -------- + pandas.DataFrame.unstack: unstack prescribed level(s) from index axis onto column axis. """ from pandas.core.reshape.reshape import stack, stack_multiple From 177143738476770f0e748f1f7b6710b11a77197b Mon Sep 17 00:00:00 2001 From: Samuel Date: Tue, 20 Mar 2018 20:39:41 +0000 Subject: [PATCH 02/25] Polish the docstring (plural issues and the like). --- pandas/core/frame.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3f2e7189ce74f..86f2929297c6e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5154,17 +5154,18 @@ def stack(self, level=-1, dropna=True): columns of the current dataframe: - if the columns have a single level, the output is a Series; - - if the columns have multiple levels, the new index level - is taken from the prescribed level(s) and the output is a - DataFrame. + - if the columns have multiple levels, the new index + level(s) is (are) taken from the prescribed level(s) and + the output is a DataFrame. The new index levels are sorted. Parameters ---------- level : int, string, list, default last level - Level(s) to stack from the column axis, defined as - integers or strings. + Level(s) to stack from the column axis onto the index + axis, defined as one index or label, or a list of indices + or labels. dropna : boolean, default True Whether to drop rows in the resulting Frame/Series with no valid values. From f17b52b75b47d6bc66428dcc270592fb86e8b6a3 Mon Sep 17 00:00:00 2001 From: Samuel Date: Tue, 20 Mar 2018 20:40:10 +0000 Subject: [PATCH 03/25] Add description to example. --- pandas/core/frame.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 86f2929297c6e..e5a841cc09c25 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5172,6 +5172,9 @@ def stack(self, level=-1, dropna=True): Examples ---------- + + Stacking a simple dataframe with a single level column axis + >>> s = pd.DataFrame([[0, 1], [2, 3]], index=['one', 'two'], columns=['a', 'b']) >>> s a b From c7561411dec6a8cba118075a17f5d6d7642a0667 Mon Sep 17 00:00:00 2001 From: Samuel Date: Tue, 20 Mar 2018 20:45:28 +0000 Subject: [PATCH 04/25] Add an example with multi-level column. --- pandas/core/frame.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e5a841cc09c25..f2f2fb6fc2419 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5187,6 +5187,22 @@ def stack(self, level=-1, dropna=True): b 3 dtype: int64 + Stacking a simple dataframe with a multi-level column axis + + >>> multicol = pd.MultiIndex.from_tuples([('X', 'a'), ('X', 'b')]) + >>> s = pd.DataFrame([[0, 1], [2, 3]], index=['one', 'two'], columns=multicol) + >>> s + X + a b + one 0 1 + two 2 3 + >>> s.stack() + X + one a 0 + b 1 + two a 2 + b 3 + Returns ------- stacked : DataFrame or Series From 4d09b85d6e39add2502d913a6e6462366bf67c7e Mon Sep 17 00:00:00 2001 From: Samuel Date: Tue, 20 Mar 2018 21:09:17 +0000 Subject: [PATCH 05/25] Add more examples. --- pandas/core/frame.py | 56 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f2f2fb6fc2419..c122f1e8ec03f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5167,13 +5167,16 @@ def stack(self, level=-1, dropna=True): axis, defined as one index or label, or a list of indices or labels. dropna : boolean, default True - Whether to drop rows in the resulting Frame/Series with no - valid values. + Whether to drop rows in the resulting Frame/Series with + missing values. Stacking a column level onto the index + axis can create combinations of index and column values + that are missing from the original dataframe. See Examples + section. Examples - ---------- + -------- - Stacking a simple dataframe with a single level column axis + Stacking a dataframe with a single level column axis: >>> s = pd.DataFrame([[0, 1], [2, 3]], index=['one', 'two'], columns=['a', 'b']) >>> s @@ -5187,7 +5190,7 @@ def stack(self, level=-1, dropna=True): b 3 dtype: int64 - Stacking a simple dataframe with a multi-level column axis + Stacking a dataframe with a multi-level column axis with no missing values: >>> multicol = pd.MultiIndex.from_tuples([('X', 'a'), ('X', 'b')]) >>> s = pd.DataFrame([[0, 1], [2, 3]], index=['one', 'two'], columns=multicol) @@ -5203,6 +5206,47 @@ def stack(self, level=-1, dropna=True): two a 2 b 3 + Stacking a dataframe with a multi-level column axis with no missing values + + >>> multicol = pd.MultiIndex.from_tuples([('X', 'a'), ('Y', 'b')]) + >>> s = pd.DataFrame([[0.0, 1.0], [2.0, 3.0]], index=['one', 'two'], columns=multicol) + >>> s + X Y + a b + one 0.0 1.0 + two 2.0 3.0 + + By default the missing values are filled with NaNs: + >>> s.stack() + X Y + one a 0.0 NaN + b NaN 1.0 + two a 2.0 NaN + b NaN 3.0 + + Rows where all values are missing are dropped by default: + + >>> multicol = pd.MultiIndex.from_tuples([('X', 'a'), ('Y', 'b')]) + >>> s = pd.DataFrame([[None, 1.0], [2.0, 3.0]], index=['one', 'two'], columns=multicol) + >>> s + X Y + a b + one NaN 1.0 + two 2.0 3.0 + + >>> s.stack(dropna=False) + X Y + one a NaN NaN + b NaN 1.0 + two a 2.0 NaN + b NaN 3.0 + + >>> s.stack(dropna=True) + X Y + one b NaN 1.0 + two a 2.0 NaN + b NaN 3.0 + Returns ------- stacked : DataFrame or Series @@ -5210,6 +5254,8 @@ def stack(self, level=-1, dropna=True): See Also -------- pandas.DataFrame.unstack: unstack prescribed level(s) from index axis onto column axis. + pandas.DataFrame.pivot: reshape dataframe from long format to wide format. + pandas.DataFrame.pivot_table: create a spreadsheet-style pivot table as a DataFrame. """ from pandas.core.reshape.reshape import stack, stack_multiple From d5a262a24253d176311547f9445a004c1eac1544 Mon Sep 17 00:00:00 2001 From: Samuel Date: Tue, 20 Mar 2018 21:15:25 +0000 Subject: [PATCH 06/25] Fix sphinx docs --- pandas/core/frame.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c122f1e8ec03f..883e538bc9bbf 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5217,6 +5217,7 @@ def stack(self, level=-1, dropna=True): two 2.0 3.0 By default the missing values are filled with NaNs: + >>> s.stack() X Y one a 0.0 NaN From d3ef09426a66317571219777d02054666314af07 Mon Sep 17 00:00:00 2001 From: Samuel Date: Tue, 20 Mar 2018 21:16:50 +0000 Subject: [PATCH 07/25] Fix parameter types --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 883e538bc9bbf..5834fa8ea1232 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5162,11 +5162,11 @@ def stack(self, level=-1, dropna=True): Parameters ---------- - level : int, string, list, default last level + level : int, str, list, default -1 Level(s) to stack from the column axis onto the index axis, defined as one index or label, or a list of indices or labels. - dropna : boolean, default True + dropna : bool, default True Whether to drop rows in the resulting Frame/Series with missing values. Stacking a column level onto the index axis can create combinations of index and column values From 4d60246abc1809403a9794b0ee7a73f3bfc7accc Mon Sep 17 00:00:00 2001 From: Samuel Date: Tue, 20 Mar 2018 21:24:06 +0000 Subject: [PATCH 08/25] Post review improvements. Reviewed by Marco. --- pandas/core/frame.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5834fa8ea1232..da3d4502bdd43 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5250,13 +5250,14 @@ def stack(self, level=-1, dropna=True): Returns ------- - stacked : DataFrame or Series + DataFrame or Series + Stacked dataframe or series. See Also -------- - pandas.DataFrame.unstack: unstack prescribed level(s) from index axis onto column axis. - pandas.DataFrame.pivot: reshape dataframe from long format to wide format. - pandas.DataFrame.pivot_table: create a spreadsheet-style pivot table as a DataFrame. + DataFrame.unstack: unstack prescribed level(s) from index axis onto column axis. + DataFrame.pivot: reshape dataframe from long format to wide format. + DataFrame.pivot_table: create a spreadsheet-style pivot table as a DataFrame. """ from pandas.core.reshape.reshape import stack, stack_multiple From 16301d6c881f3888b78082b2775a76d3f291a8cc Mon Sep 17 00:00:00 2001 From: Samuel Date: Tue, 20 Mar 2018 21:34:07 +0000 Subject: [PATCH 09/25] Start refactoring the examples. Separate creation of data with examples. --- pandas/core/frame.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index da3d4502bdd43..453d88ec4de69 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5175,15 +5175,21 @@ def stack(self, level=-1, dropna=True): Examples -------- + >>> df_single_level_cols = pd.DataFrame([[0, 1], [2, 3]], + ... index=['one', 'two'], + ... columns=['a', 'b']) + >>> multicol = pd.MultiIndex.from_tuples([('X', 'a'), ('X', 'b')]) + >>> df_multi_level_cols = pd.DataFrame([[0, 1], [2, 3]], + ... index=['one', 'two'], + ... columns=multicol) - Stacking a dataframe with a single level column axis: + Stacking a dataframe with a single level column axis returns a Series: - >>> s = pd.DataFrame([[0, 1], [2, 3]], index=['one', 'two'], columns=['a', 'b']) - >>> s + >>> df_single_level_cols a b one 0 1 two 2 3 - >>> s.stack() + >>> df_single_level_cols.stack() one a 0 b 1 two a 2 @@ -5192,14 +5198,12 @@ def stack(self, level=-1, dropna=True): Stacking a dataframe with a multi-level column axis with no missing values: - >>> multicol = pd.MultiIndex.from_tuples([('X', 'a'), ('X', 'b')]) - >>> s = pd.DataFrame([[0, 1], [2, 3]], index=['one', 'two'], columns=multicol) - >>> s + >>> df_multi_level_cols X a b one 0 1 two 2 3 - >>> s.stack() + >>> df_multi_level_cols.stack() X one a 0 b 1 From 310511dd5c01b0861dcf7f9dc06de355729ffa4f Mon Sep 17 00:00:00 2001 From: Samuel Date: Tue, 20 Mar 2018 21:39:37 +0000 Subject: [PATCH 10/25] Refactor examples --- pandas/core/frame.py | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 453d88ec4de69..95dbfc3cbe63e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5178,10 +5178,17 @@ def stack(self, level=-1, dropna=True): >>> df_single_level_cols = pd.DataFrame([[0, 1], [2, 3]], ... index=['one', 'two'], ... columns=['a', 'b']) - >>> multicol = pd.MultiIndex.from_tuples([('X', 'a'), ('X', 'b')]) - >>> df_multi_level_cols = pd.DataFrame([[0, 1], [2, 3]], + >>> multicol1 = pd.MultiIndex.from_tuples([('X', 'a'), ('X', 'b')]) + >>> df_multi_level_cols1 = pd.DataFrame([[0, 1], [2, 3]], ... index=['one', 'two'], - ... columns=multicol) + ... columns=multicol1) + >>> multicol2 = pd.MultiIndex.from_tuples([('X', 'a'), ('Y', 'b')]) + >>> df_multi_level_cols2 = pd.DataFrame([[0.0, 1.0], [2.0, 3.0]], + ... index=['one', 'two'], + ... columns=multicol2) + >>> df_multi_level_cols3 = pd.DataFrame([[None, 1.0], [2.0, 3.0]], + ... index=['one', 'two'], + ... columns=multicol2) Stacking a dataframe with a single level column axis returns a Series: @@ -5198,31 +5205,27 @@ def stack(self, level=-1, dropna=True): Stacking a dataframe with a multi-level column axis with no missing values: - >>> df_multi_level_cols + >>> df_multi_level_cols1 X a b one 0 1 two 2 3 - >>> df_multi_level_cols.stack() + >>> df_multi_level_cols1.stack() X one a 0 b 1 two a 2 b 3 - Stacking a dataframe with a multi-level column axis with no missing values + Stacking a dataframe with a multi-level column axis with no missing values. + By default the missing values are filled with NaNs: - >>> multicol = pd.MultiIndex.from_tuples([('X', 'a'), ('Y', 'b')]) - >>> s = pd.DataFrame([[0.0, 1.0], [2.0, 3.0]], index=['one', 'two'], columns=multicol) - >>> s + >>> df_multi_level_cols2 X Y a b one 0.0 1.0 two 2.0 3.0 - - By default the missing values are filled with NaNs: - - >>> s.stack() + >>> df_multi_level_cols2.stack() X Y one a 0.0 NaN b NaN 1.0 @@ -5231,22 +5234,20 @@ def stack(self, level=-1, dropna=True): Rows where all values are missing are dropped by default: - >>> multicol = pd.MultiIndex.from_tuples([('X', 'a'), ('Y', 'b')]) - >>> s = pd.DataFrame([[None, 1.0], [2.0, 3.0]], index=['one', 'two'], columns=multicol) - >>> s + >>> df_multi_level_cols3 X Y a b one NaN 1.0 two 2.0 3.0 - >>> s.stack(dropna=False) + >>> df_multi_level_cols3.stack(dropna=False) X Y one a NaN NaN b NaN 1.0 two a 2.0 NaN b NaN 3.0 - >>> s.stack(dropna=True) + >>> df_multi_level_cols3.stack(dropna=True) X Y one b NaN 1.0 two a 2.0 NaN From 7e102739afdb3f09d1744a2797d5e04e4743fc37 Mon Sep 17 00:00:00 2001 From: Samuel Date: Tue, 20 Mar 2018 22:08:38 +0000 Subject: [PATCH 11/25] Polish examples. --- pandas/core/frame.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 95dbfc3cbe63e..9ee19dcdeb968 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5217,8 +5217,10 @@ def stack(self, level=-1, dropna=True): two a 2 b 3 - Stacking a dataframe with a multi-level column axis with no missing values. - By default the missing values are filled with NaNs: + It is common to have missing values when stacking a dataframe + with multi-level columns, since the stacked dataframe can have + more values than the original dataframe. By default the + missing values are filled with NaNs: >>> df_multi_level_cols2 X Y @@ -5232,14 +5234,15 @@ def stack(self, level=-1, dropna=True): two a 2.0 NaN b NaN 3.0 - Rows where all values are missing are dropped by default: + Rows where all values are missing are dropped by default but + this behaviour can be controlled via the dropna keyword + parameter: >>> df_multi_level_cols3 X Y a b one NaN 1.0 two 2.0 3.0 - >>> df_multi_level_cols3.stack(dropna=False) X Y one a NaN NaN From 77c9fac20e35358b21337811324cc99132001bc8 Mon Sep 17 00:00:00 2001 From: Samuel Date: Tue, 20 Mar 2018 22:33:53 +0000 Subject: [PATCH 12/25] Add an example where multiple levels are stacked at once. --- pandas/core/frame.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9ee19dcdeb968..4fdc9d5da36d3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5173,6 +5173,12 @@ def stack(self, level=-1, dropna=True): that are missing from the original dataframe. See Examples section. + Notes + ----- + The function is named by analogy with a stack of books + (levels) being re-organised from a horizontal position (column + levels) to a vertical position (index levels). + Examples -------- >>> df_single_level_cols = pd.DataFrame([[0, 1], [2, 3]], @@ -5203,7 +5209,7 @@ def stack(self, level=-1, dropna=True): b 3 dtype: int64 - Stacking a dataframe with a multi-level column axis with no missing values: + Stacking a dataframe with a multi-level column axis: >>> df_multi_level_cols1 X @@ -5218,8 +5224,8 @@ def stack(self, level=-1, dropna=True): b 3 It is common to have missing values when stacking a dataframe - with multi-level columns, since the stacked dataframe can have - more values than the original dataframe. By default the + with multi-level columns, as the stacked dataframe typically + has more values than the original dataframe. By default the missing values are filled with NaNs: >>> df_multi_level_cols2 @@ -5234,9 +5240,18 @@ def stack(self, level=-1, dropna=True): two a 2.0 NaN b NaN 3.0 - Rows where all values are missing are dropped by default but - this behaviour can be controlled via the dropna keyword - parameter: + Multiple levels can be stacked at once + + >>> df_multi_level_cols2.stack([1, 0]) + one a X 0.0 + b Y 1.0 + two a X 2.0 + b Y 3.0 + dtype: float64 + + Note that rows where all values are missing are dropped by + default but this behaviour can be controlled via the dropna + keyword parameter: >>> df_multi_level_cols3 X Y From 98a4a93f62eb5d04b40732e2b303a4f16a49c96f Mon Sep 17 00:00:00 2001 From: Samuel Date: Tue, 20 Mar 2018 22:38:48 +0000 Subject: [PATCH 13/25] Clarify filling behaviour with missing values --- pandas/core/frame.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4fdc9d5da36d3..09902f23d5689 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5225,8 +5225,8 @@ def stack(self, level=-1, dropna=True): It is common to have missing values when stacking a dataframe with multi-level columns, as the stacked dataframe typically - has more values than the original dataframe. By default the - missing values are filled with NaNs: + has more values than the original dataframe. Missing values + are filled with NaNs: >>> df_multi_level_cols2 X Y @@ -5240,13 +5240,19 @@ def stack(self, level=-1, dropna=True): two a 2.0 NaN b NaN 3.0 - Multiple levels can be stacked at once - - >>> df_multi_level_cols2.stack([1, 0]) - one a X 0.0 - b Y 1.0 - two a X 2.0 - b Y 3.0 + The first parameter controls which level or levels are stacked: + + >>> df_multi_level_cols2.stack(0) + a b + one X 0.0 NaN + Y NaN 1.0 + two X 2.0 NaN + Y NaN 3.0 + >>> df_multi_level_cols2.stack([0, 1]) + one X a 0.0 + Y b 1.0 + two X a 2.0 + Y b 3.0 dtype: float64 Note that rows where all values are missing are dropped by From 41ad4cff06de2c4900dc4c998545a46ad2c37c68 Mon Sep 17 00:00:00 2001 From: Samuel Date: Tue, 20 Mar 2018 22:42:55 +0000 Subject: [PATCH 14/25] flake8 --- pandas/core/frame.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 09902f23d5689..2215dc63c9c53 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5284,9 +5284,12 @@ def stack(self, level=-1, dropna=True): See Also -------- - DataFrame.unstack: unstack prescribed level(s) from index axis onto column axis. - DataFrame.pivot: reshape dataframe from long format to wide format. - DataFrame.pivot_table: create a spreadsheet-style pivot table as a DataFrame. + DataFrame.unstack: unstack prescribed level(s) from index axis + onto column axis. + DataFrame.pivot: reshape dataframe from long format to wide + format. + DataFrame.pivot_table: create a spreadsheet-style pivot table + as a DataFrame. """ from pandas.core.reshape.reshape import stack, stack_multiple From 99734ac628ef64a7f51749c16245edb203bf29ec Mon Sep 17 00:00:00 2001 From: Samuel Date: Wed, 21 Mar 2018 06:27:53 +0000 Subject: [PATCH 15/25] Put Examples section at the end. --- pandas/core/frame.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2215dc63c9c53..3a812ce0a10e8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5173,6 +5173,20 @@ def stack(self, level=-1, dropna=True): that are missing from the original dataframe. See Examples section. + Returns + ------- + DataFrame or Series + Stacked dataframe or series. + + See Also + -------- + DataFrame.unstack: unstack prescribed level(s) from index axis + onto column axis. + DataFrame.pivot: reshape dataframe from long format to wide + format. + DataFrame.pivot_table: create a spreadsheet-style pivot table + as a DataFrame. + Notes ----- The function is named by analogy with a stack of books @@ -5276,20 +5290,6 @@ def stack(self, level=-1, dropna=True): one b NaN 1.0 two a 2.0 NaN b NaN 3.0 - - Returns - ------- - DataFrame or Series - Stacked dataframe or series. - - See Also - -------- - DataFrame.unstack: unstack prescribed level(s) from index axis - onto column axis. - DataFrame.pivot: reshape dataframe from long format to wide - format. - DataFrame.pivot_table: create a spreadsheet-style pivot table - as a DataFrame. """ from pandas.core.reshape.reshape import stack, stack_multiple From 652f7b279e9da00640b1f653468140d075989d9c Mon Sep 17 00:00:00 2001 From: Samuel Date: Wed, 21 Mar 2018 06:30:19 +0000 Subject: [PATCH 16/25] Fix 'See Also' section. --- pandas/core/frame.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3a812ce0a10e8..9e2c40571da46 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5180,11 +5180,11 @@ def stack(self, level=-1, dropna=True): See Also -------- - DataFrame.unstack: unstack prescribed level(s) from index axis + DataFrame.unstack : Unstack prescribed level(s) from index axis onto column axis. - DataFrame.pivot: reshape dataframe from long format to wide + DataFrame.pivot : Reshape dataframe from long format to wide format. - DataFrame.pivot_table: create a spreadsheet-style pivot table + DataFrame.pivot_table : Create a spreadsheet-style pivot table as a DataFrame. Notes From 7f422d6499530a9a08c0cf637d439a923a86bccd Mon Sep 17 00:00:00 2001 From: Samuel Date: Wed, 21 Mar 2018 06:37:09 +0000 Subject: [PATCH 17/25] Create separate section for single level columns. More clear than lumping all the definitions into a single section at the start. --- pandas/core/frame.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9e2c40571da46..7501802409edd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5195,9 +5195,6 @@ def stack(self, level=-1, dropna=True): Examples -------- - >>> df_single_level_cols = pd.DataFrame([[0, 1], [2, 3]], - ... index=['one', 'two'], - ... columns=['a', 'b']) >>> multicol1 = pd.MultiIndex.from_tuples([('X', 'a'), ('X', 'b')]) >>> df_multi_level_cols1 = pd.DataFrame([[0, 1], [2, 3]], ... index=['one', 'two'], @@ -5210,6 +5207,12 @@ def stack(self, level=-1, dropna=True): ... index=['one', 'two'], ... columns=multicol2) + **Single level columns** + + >>> df_single_level_cols = pd.DataFrame([[0, 1], [2, 3]], + ... index=['one', 'two'], + ... columns=['a', 'b']) + Stacking a dataframe with a single level column axis returns a Series: >>> df_single_level_cols From 15902ed6bbbb307b268322177f424cd0351bb7de Mon Sep 17 00:00:00 2001 From: Samuel Date: Thu, 22 Mar 2018 19:33:30 +0000 Subject: [PATCH 18/25] Split the examples into several sections. Easier to follow. --- pandas/core/frame.py | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7501802409edd..df058a1790b35 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5195,17 +5195,6 @@ def stack(self, level=-1, dropna=True): Examples -------- - >>> multicol1 = pd.MultiIndex.from_tuples([('X', 'a'), ('X', 'b')]) - >>> df_multi_level_cols1 = pd.DataFrame([[0, 1], [2, 3]], - ... index=['one', 'two'], - ... columns=multicol1) - >>> multicol2 = pd.MultiIndex.from_tuples([('X', 'a'), ('Y', 'b')]) - >>> df_multi_level_cols2 = pd.DataFrame([[0.0, 1.0], [2.0, 3.0]], - ... index=['one', 'two'], - ... columns=multicol2) - >>> df_multi_level_cols3 = pd.DataFrame([[None, 1.0], [2.0, 3.0]], - ... index=['one', 'two'], - ... columns=multicol2) **Single level columns** @@ -5226,6 +5215,13 @@ def stack(self, level=-1, dropna=True): b 3 dtype: int64 + **Multi level columns: simple case** + + >>> multicol1 = pd.MultiIndex.from_tuples([('X', 'a'), ('X', 'b')]) + >>> df_multi_level_cols1 = pd.DataFrame([[0, 1], [2, 3]], + ... index=['one', 'two'], + ... columns=multicol1) + Stacking a dataframe with a multi-level column axis: >>> df_multi_level_cols1 @@ -5240,6 +5236,13 @@ def stack(self, level=-1, dropna=True): two a 2 b 3 + **Missing values** + + >>> multicol2 = pd.MultiIndex.from_tuples([('X', 'a'), ('Y', 'b')]) + >>> df_multi_level_cols2 = pd.DataFrame([[0.0, 1.0], [2.0, 3.0]], + ... index=['one', 'two'], + ... columns=multicol2) + It is common to have missing values when stacking a dataframe with multi-level columns, as the stacked dataframe typically has more values than the original dataframe. Missing values @@ -5257,6 +5260,8 @@ def stack(self, level=-1, dropna=True): two a 2.0 NaN b NaN 3.0 + **Prescribing the level(s) to be stacked** + The first parameter controls which level or levels are stacked: >>> df_multi_level_cols2.stack(0) @@ -5272,6 +5277,13 @@ def stack(self, level=-1, dropna=True): Y b 3.0 dtype: float64 + **Dropping missing values** + + >>> df_multi_level_cols3 = pd.DataFrame([[None, 1.0], [2.0, 3.0]], + ... index=['one', 'two'], + ... columns=multicol2) + + Note that rows where all values are missing are dropped by default but this behaviour can be controlled via the dropna keyword parameter: From 23798864ceae4469ab6ddcc3e9cda7f8429f81b7 Mon Sep 17 00:00:00 2001 From: Samuel Date: Thu, 22 Mar 2018 19:35:58 +0000 Subject: [PATCH 19/25] remove unwanted blank lines --- pandas/core/frame.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index df058a1790b35..8edb872490d93 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5195,7 +5195,6 @@ def stack(self, level=-1, dropna=True): Examples -------- - **Single level columns** >>> df_single_level_cols = pd.DataFrame([[0, 1], [2, 3]], @@ -5283,7 +5282,6 @@ def stack(self, level=-1, dropna=True): ... index=['one', 'two'], ... columns=multicol2) - Note that rows where all values are missing are dropped by default but this behaviour can be controlled via the dropna keyword parameter: @@ -5299,7 +5297,6 @@ def stack(self, level=-1, dropna=True): b NaN 1.0 two a 2.0 NaN b NaN 3.0 - >>> df_multi_level_cols3.stack(dropna=True) X Y one b NaN 1.0 From 2e0873b0968fa84160395b8dd75fbb9d9e1c995c Mon Sep 17 00:00:00 2001 From: Samuel Date: Thu, 22 Mar 2018 19:46:03 +0000 Subject: [PATCH 20/25] Start using more meaningful index & column names --- pandas/core/frame.py | 72 ++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8edb872490d93..dcd5239eba4ff 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5198,48 +5198,48 @@ def stack(self, level=-1, dropna=True): **Single level columns** >>> df_single_level_cols = pd.DataFrame([[0, 1], [2, 3]], - ... index=['one', 'two'], - ... columns=['a', 'b']) + ... index=['cat', 'dog'], + ... columns=['weight', 'height']) Stacking a dataframe with a single level column axis returns a Series: >>> df_single_level_cols - a b - one 0 1 - two 2 3 + weight height + cat 0 1 + dog 2 3 >>> df_single_level_cols.stack() - one a 0 - b 1 - two a 2 - b 3 + cat weight 0 + height 1 + dog weight 2 + height 3 dtype: int64 **Multi level columns: simple case** - >>> multicol1 = pd.MultiIndex.from_tuples([('X', 'a'), ('X', 'b')]) + >>> multicol1 = pd.MultiIndex.from_tuples([('size', 'weight'), ('size', 'height')]) >>> df_multi_level_cols1 = pd.DataFrame([[0, 1], [2, 3]], - ... index=['one', 'two'], + ... index=['cat', 'dog'], ... columns=multicol1) Stacking a dataframe with a multi-level column axis: >>> df_multi_level_cols1 - X - a b - one 0 1 - two 2 3 + size + weight height + cat 0 1 + dog 2 3 >>> df_multi_level_cols1.stack() - X - one a 0 - b 1 - two a 2 - b 3 + size + cat height 1 + weight 0 + dog height 3 + weight 2 **Missing values** >>> multicol2 = pd.MultiIndex.from_tuples([('X', 'a'), ('Y', 'b')]) >>> df_multi_level_cols2 = pd.DataFrame([[0.0, 1.0], [2.0, 3.0]], - ... index=['one', 'two'], + ... index=['cat', 'dog'], ... columns=multicol2) It is common to have missing values when stacking a dataframe @@ -5250,13 +5250,13 @@ def stack(self, level=-1, dropna=True): >>> df_multi_level_cols2 X Y a b - one 0.0 1.0 - two 2.0 3.0 + cat 0.0 1.0 + dog 2.0 3.0 >>> df_multi_level_cols2.stack() X Y - one a 0.0 NaN + cat a 0.0 NaN b NaN 1.0 - two a 2.0 NaN + dog a 2.0 NaN b NaN 3.0 **Prescribing the level(s) to be stacked** @@ -5265,21 +5265,21 @@ def stack(self, level=-1, dropna=True): >>> df_multi_level_cols2.stack(0) a b - one X 0.0 NaN + cat X 0.0 NaN Y NaN 1.0 - two X 2.0 NaN + dog X 2.0 NaN Y NaN 3.0 >>> df_multi_level_cols2.stack([0, 1]) - one X a 0.0 + cat X a 0.0 Y b 1.0 - two X a 2.0 + dog X a 2.0 Y b 3.0 dtype: float64 **Dropping missing values** >>> df_multi_level_cols3 = pd.DataFrame([[None, 1.0], [2.0, 3.0]], - ... index=['one', 'two'], + ... index=['cat', 'dog'], ... columns=multicol2) Note that rows where all values are missing are dropped by @@ -5289,18 +5289,18 @@ def stack(self, level=-1, dropna=True): >>> df_multi_level_cols3 X Y a b - one NaN 1.0 - two 2.0 3.0 + cat NaN 1.0 + dog 2.0 3.0 >>> df_multi_level_cols3.stack(dropna=False) X Y - one a NaN NaN + cat a NaN NaN b NaN 1.0 - two a 2.0 NaN + dog a 2.0 NaN b NaN 3.0 >>> df_multi_level_cols3.stack(dropna=True) X Y - one b NaN 1.0 - two a 2.0 NaN + cat b NaN 1.0 + dog a 2.0 NaN b NaN 3.0 """ from pandas.core.reshape.reshape import stack, stack_multiple From 718f212f319368202e1fe3c3f6117dde27d7b435 Mon Sep 17 00:00:00 2001 From: Samuel Date: Thu, 22 Mar 2018 20:05:08 +0000 Subject: [PATCH 21/25] Use more meaningful column and index names. --- pandas/core/frame.py | 88 ++++++++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index dcd5239eba4ff..29bedfb61c633 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5216,29 +5216,29 @@ def stack(self, level=-1, dropna=True): **Multi level columns: simple case** - >>> multicol1 = pd.MultiIndex.from_tuples([('size', 'weight'), ('size', 'height')]) - >>> df_multi_level_cols1 = pd.DataFrame([[0, 1], [2, 3]], + >>> multicol1 = pd.MultiIndex.from_tuples([('weight', 'kg'), ('weight', 'pounds')]) + >>> df_multi_level_cols1 = pd.DataFrame([[1, 2], [2, 4]], ... index=['cat', 'dog'], ... columns=multicol1) Stacking a dataframe with a multi-level column axis: >>> df_multi_level_cols1 - size - weight height - cat 0 1 - dog 2 3 + weight + kg pounds + cat 1 2 + dog 2 4 >>> df_multi_level_cols1.stack() - size - cat height 1 - weight 0 - dog height 3 - weight 2 + weight + cat kg 1 + pounds 2 + dog kg 2 + pounds 4 **Missing values** - >>> multicol2 = pd.MultiIndex.from_tuples([('X', 'a'), ('Y', 'b')]) - >>> df_multi_level_cols2 = pd.DataFrame([[0.0, 1.0], [2.0, 3.0]], + >>> multicol2 = pd.MultiIndex.from_tuples([('weight', 'kg'), ('height', 'm')]) + >>> df_multi_level_cols2 = pd.DataFrame([[1.0, 2.0], [3.0, 4.0]], ... index=['cat', 'dog'], ... columns=multicol2) @@ -5248,32 +5248,32 @@ def stack(self, level=-1, dropna=True): are filled with NaNs: >>> df_multi_level_cols2 - X Y - a b - cat 0.0 1.0 - dog 2.0 3.0 + weight height + kg m + cat 1.0 2.0 + dog 3.0 4.0 >>> df_multi_level_cols2.stack() - X Y - cat a 0.0 NaN - b NaN 1.0 - dog a 2.0 NaN - b NaN 3.0 + height weight + cat kg NaN 1.0 + m 2.0 NaN + dog kg NaN 3.0 + m 4.0 NaN **Prescribing the level(s) to be stacked** The first parameter controls which level or levels are stacked: >>> df_multi_level_cols2.stack(0) - a b - cat X 0.0 NaN - Y NaN 1.0 - dog X 2.0 NaN - Y NaN 3.0 + kg m + cat height NaN 2.0 + weight 1.0 NaN + dog height NaN 4.0 + weight 3.0 NaN >>> df_multi_level_cols2.stack([0, 1]) - cat X a 0.0 - Y b 1.0 - dog X a 2.0 - Y b 3.0 + cat height m 2.0 + weight kg 1.0 + dog height m 4.0 + weight kg 3.0 dtype: float64 **Dropping missing values** @@ -5287,21 +5287,21 @@ def stack(self, level=-1, dropna=True): keyword parameter: >>> df_multi_level_cols3 - X Y - a b - cat NaN 1.0 - dog 2.0 3.0 + weight height + kg m + cat NaN 1.0 + dog 2.0 3.0 >>> df_multi_level_cols3.stack(dropna=False) - X Y - cat a NaN NaN - b NaN 1.0 - dog a 2.0 NaN - b NaN 3.0 + height weight + cat kg NaN NaN + m 1.0 NaN + dog kg NaN 2.0 + m 3.0 NaN >>> df_multi_level_cols3.stack(dropna=True) - X Y - cat b NaN 1.0 - dog a 2.0 NaN - b NaN 3.0 + height weight + cat m 1.0 NaN + dog kg NaN 2.0 + m 3.0 NaN """ from pandas.core.reshape.reshape import stack, stack_multiple From 747d245f8708776b250383a06351c91958d4c4c2 Mon Sep 17 00:00:00 2001 From: Samuel Date: Sun, 25 Mar 2018 12:57:00 +0100 Subject: [PATCH 22/25] Shorten overly long lines in examples. --- pandas/core/frame.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 29bedfb61c633..ca5cd3c36bdb1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5216,7 +5216,8 @@ def stack(self, level=-1, dropna=True): **Multi level columns: simple case** - >>> multicol1 = pd.MultiIndex.from_tuples([('weight', 'kg'), ('weight', 'pounds')]) + >>> multicol1 = pd.MultiIndex.from_tuples([('weight', 'kg'), + ... ('weight', 'pounds')]) >>> df_multi_level_cols1 = pd.DataFrame([[1, 2], [2, 4]], ... index=['cat', 'dog'], ... columns=multicol1) @@ -5237,7 +5238,8 @@ def stack(self, level=-1, dropna=True): **Missing values** - >>> multicol2 = pd.MultiIndex.from_tuples([('weight', 'kg'), ('height', 'm')]) + >>> multicol2 = pd.MultiIndex.from_tuples([('weight', 'kg'), + ... ('height', 'm')]) >>> df_multi_level_cols2 = pd.DataFrame([[1.0, 2.0], [3.0, 4.0]], ... index=['cat', 'dog'], ... columns=multicol2) From a2c9b1a6b71feda6e413fc42be4b5daf262c55fb Mon Sep 17 00:00:00 2001 From: Samuel Date: Sun, 25 Mar 2018 12:57:47 +0100 Subject: [PATCH 23/25] Shorter one line description. --- pandas/core/frame.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ca5cd3c36bdb1..2a3359d2dd2e6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5145,8 +5145,7 @@ def pivot_table(self, values=None, index=None, columns=None, def stack(self, level=-1, dropna=True): """ - Stack the prescribed level(s) from the column axis onto the index - axis. + Stack the prescribed level(s) from columns to index. Return a reshaped DataFrame or Series having a multi-level index with one or more new inner-most levels compared to the current From d34732d4ae9401b3c761665702eec5c92272a3fc Mon Sep 17 00:00:00 2001 From: Samuel Date: Sun, 25 Mar 2018 13:01:30 +0100 Subject: [PATCH 24/25] Better description in the notes section. --- pandas/core/frame.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2a3359d2dd2e6..1be9de29c7c48 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5188,9 +5188,11 @@ def stack(self, level=-1, dropna=True): Notes ----- - The function is named by analogy with a stack of books - (levels) being re-organised from a horizontal position (column - levels) to a vertical position (index levels). + The function is named by analogy with a collection of books + being re-organised from being side by side on a horizontal + position (the columns of the dataframe) to being stacked + vertically on top of of each other (in the index of the + dataframe). Examples -------- From 5bc794c8dee2e5189dc17078cb4367caf63f10f8 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 26 Mar 2018 08:05:37 -0500 Subject: [PATCH 25/25] Formatting [ci skip] [ci skip] --- pandas/core/frame.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1be9de29c7c48..51394c47468cd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5149,7 +5149,7 @@ def stack(self, level=-1, dropna=True): Return a reshaped DataFrame or Series having a multi-level index with one or more new inner-most levels compared to the current - dataframe. The new inner-most levels are created by pivoting the + DataFrame. The new inner-most levels are created by pivoting the columns of the current dataframe: - if the columns have a single level, the output is a Series; @@ -5220,8 +5220,8 @@ def stack(self, level=-1, dropna=True): >>> multicol1 = pd.MultiIndex.from_tuples([('weight', 'kg'), ... ('weight', 'pounds')]) >>> df_multi_level_cols1 = pd.DataFrame([[1, 2], [2, 4]], - ... index=['cat', 'dog'], - ... columns=multicol1) + ... index=['cat', 'dog'], + ... columns=multicol1) Stacking a dataframe with a multi-level column axis: