From b44c61854fd0a4d57db035826ad4ad4d26203f98 Mon Sep 17 00:00:00 2001 From: Catherine Chen Date: Tue, 1 May 2018 20:49:10 +0100 Subject: [PATCH 1/7] added examples to groupby-mean --- pandas/core/groupby/groupby.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 8613ab4d8c59d..ec799dc534152 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1266,8 +1266,34 @@ def count(self): def mean(self, *args, **kwargs): """ Compute mean of groups, excluding missing values + + Example of groupby one column: + -------- + >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2], + 'B': [np.nan, 2, 3, 4, 5]}, columns=['A', 'B']) + >>> g = df.groupby('A')['B'].mean() + >>> g + A + 1 3.0 + 2 4.0 + For multiple groupings, the result index will be a MultiIndex + + Example of groupby multiple columns: + -------- + >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2], + 'B': [np.nan, 2, 3, 4, 5], + 'C': [1, 2, 1, 1, 2]}, columns=['A', 'B', 'C']) + >>> g = df.groupby(['A', 'C'])['B'].mean() + >>> g + A C + 1 1 4.0 + 2 2.0 + 2 1 3.0 + 2 5.0 + + """ nv.validate_groupby_func('mean', args, kwargs, ['numeric_only']) try: From f339b2cb795d25193d255379bbf52722b76f8b58 Mon Sep 17 00:00:00 2001 From: Catherine Chen Date: Tue, 1 May 2018 21:08:23 +0100 Subject: [PATCH 2/7] added what is returned --- pandas/core/groupby/groupby.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index ec799dc534152..62d2daad0dae5 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1268,7 +1268,7 @@ def mean(self, *args, **kwargs): Compute mean of groups, excluding missing values Example of groupby one column: - -------- + ------------------------------ >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2], 'B': [np.nan, 2, 3, 4, 5]}, columns=['A', 'B']) >>> g = df.groupby('A')['B'].mean() @@ -1281,7 +1281,7 @@ def mean(self, *args, **kwargs): For multiple groupings, the result index will be a MultiIndex Example of groupby multiple columns: - -------- + ------------------------------------ >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2], 'B': [np.nan, 2, 3, 4, 5], 'C': [1, 2, 1, 1, 2]}, columns=['A', 'B', 'C']) @@ -1293,6 +1293,12 @@ def mean(self, *args, **kwargs): 2 1 3.0 2 5.0 + + Returns + ------- + pandas.core.series.Series + The average of the target column ('B' in the examples above) grouped by the groupby columns ('A' and ['A', 'C'] + in the examples above) """ nv.validate_groupby_func('mean', args, kwargs, ['numeric_only']) From 29756b7be7238d92205f42d9626d78cf6d9a6b6d Mon Sep 17 00:00:00 2001 From: Catherine Chen Date: Wed, 2 May 2018 10:24:38 +0100 Subject: [PATCH 3/7] adjust the sequence of descriptions by putting examples after explaination --- pandas/core/groupby/groupby.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 62d2daad0dae5..ce396fc922e81 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1267,6 +1267,14 @@ def mean(self, *args, **kwargs): """ Compute mean of groups, excluding missing values + For multiple groupings, the result index will be a MultiIndex + + Returns + ------- + pandas.core.series.Series + The average of the target column ('B' in the examples above) grouped by the groupby columns ('A' and ['A', 'C'] + in the examples above) + Example of groupby one column: ------------------------------ >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2], @@ -1277,9 +1285,6 @@ def mean(self, *args, **kwargs): 1 3.0 2 4.0 - - For multiple groupings, the result index will be a MultiIndex - Example of groupby multiple columns: ------------------------------------ >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2], @@ -1293,13 +1298,6 @@ def mean(self, *args, **kwargs): 2 1 3.0 2 5.0 - - Returns - ------- - pandas.core.series.Series - The average of the target column ('B' in the examples above) grouped by the groupby columns ('A' and ['A', 'C'] - in the examples above) - """ nv.validate_groupby_func('mean', args, kwargs, ['numeric_only']) try: From e71fbce586f54baa51a9d3dde2407f6237de8867 Mon Sep 17 00:00:00 2001 From: Catherine Chen Date: Tue, 8 May 2018 16:40:03 +0100 Subject: [PATCH 4/7] Created long summary and moved examples to the end and cleaned format --- pandas/core/groupby/groupby.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index ce396fc922e81..63c7139c70190 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1265,16 +1265,19 @@ def count(self): @Appender(_doc_template) def mean(self, *args, **kwargs): """ - Compute mean of groups, excluding missing values - - For multiple groupings, the result index will be a MultiIndex + Compute mean of the target column for groups that are defined by the groupby columns. + Missing values are excluded in computing the mean. + If there is one groupby column, the groups are the unique values in the specified groupby column. + If there are multiple groupby columns, the groups are the unique combinations of the specified groupby columns. Returns ------- pandas.core.series.Series - The average of the target column ('B' in the examples above) grouped by the groupby columns ('A' and ['A', 'C'] - in the examples above) - + The average of the target column ('B' in the examples below) + grouped by the groupby columns ('A' and ['A', 'C'] in the examples below) + The groups are stored as index in the result. + If there are multiple groupby columns, the result index will be a MultiIndex. + Example of groupby one column: ------------------------------ >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2], @@ -1285,7 +1288,7 @@ def mean(self, *args, **kwargs): 1 3.0 2 4.0 - Example of groupby multiple columns: + Example of groupby multiple columns: ------------------------------------ >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2], 'B': [np.nan, 2, 3, 4, 5], From e709f31f120b6ab07e8d426aba8d8be2f5a18926 Mon Sep 17 00:00:00 2001 From: Catherine Chen Date: Tue, 8 May 2018 16:57:46 +0100 Subject: [PATCH 5/7] Cleaned format and added ... to codes with continued lines --- pandas/core/groupby/groupby.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 63c7139c70190..5dc2a9dda8abf 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1275,24 +1275,22 @@ def mean(self, *args, **kwargs): pandas.core.series.Series The average of the target column ('B' in the examples below) grouped by the groupby columns ('A' and ['A', 'C'] in the examples below) - The groups are stored as index in the result. - If there are multiple groupby columns, the result index will be a MultiIndex. - Example of groupby one column: - ------------------------------ + Example of groupby one column:the groups are stored as index in the result. + --------------------------------------------------------------------------- >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2], - 'B': [np.nan, 2, 3, 4, 5]}, columns=['A', 'B']) + ... 'B': [np.nan, 2, 3, 4, 5]}, columns=['A', 'B']) >>> g = df.groupby('A')['B'].mean() >>> g A 1 3.0 2 4.0 - Example of groupby multiple columns: - ------------------------------------ + Example of groupby multiple columns: the result index will be a MultiIndex. + --------------------------------------------------------------------------- >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2], - 'B': [np.nan, 2, 3, 4, 5], - 'C': [1, 2, 1, 1, 2]}, columns=['A', 'B', 'C']) + ... 'B': [np.nan, 2, 3, 4, 5], + ... 'C': [1, 2, 1, 1, 2]}, columns=['A', 'B', 'C']) >>> g = df.groupby(['A', 'C'])['B'].mean() >>> g A C From 31084a37dbb73c1ce4ad29e15b60b339046df1dd Mon Sep 17 00:00:00 2001 From: Catherine Chen Date: Tue, 15 May 2018 13:29:05 +0100 Subject: [PATCH 6/7] Added a blank line for extended summary. Improved description of examples. Changed dashlines to blank lines. --- pandas/core/groupby/groupby.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 5dc2a9dda8abf..8ef24eb4d5da2 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1265,19 +1265,20 @@ def count(self): @Appender(_doc_template) def mean(self, *args, **kwargs): """ + Compute mean of the target column for groups that are defined by the groupby columns. Missing values are excluded in computing the mean. If there is one groupby column, the groups are the unique values in the specified groupby column. If there are multiple groupby columns, the groups are the unique combinations of the specified groupby columns. Returns - ------- + pandas.core.series.Series The average of the target column ('B' in the examples below) grouped by the groupby columns ('A' and ['A', 'C'] in the examples below) - Example of groupby one column:the groups are stored as index in the result. - --------------------------------------------------------------------------- + Groubpy by one column. The result index are the group labels. + >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2], ... 'B': [np.nan, 2, 3, 4, 5]}, columns=['A', 'B']) >>> g = df.groupby('A')['B'].mean() @@ -1286,8 +1287,8 @@ def mean(self, *args, **kwargs): 1 3.0 2 4.0 - Example of groupby multiple columns: the result index will be a MultiIndex. - --------------------------------------------------------------------------- + Groubpy by multiple columns. + >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2], ... 'B': [np.nan, 2, 3, 4, 5], ... 'C': [1, 2, 1, 1, 2]}, columns=['A', 'B', 'C']) From 1eeedd706b16e0cd6acb290a30d2ec3fa4bb0240 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 7 Jul 2018 15:31:36 -0500 Subject: [PATCH 7/7] formatting and other examples --- pandas/core/groupby/groupby.py | 59 +++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 95ff2a234c0ee..aa4c7452bcea9 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1297,41 +1297,48 @@ def count(self): @Appender(_doc_template) def mean(self, *args, **kwargs): """ - - Compute mean of the target column for groups that are defined by the groupby columns. - Missing values are excluded in computing the mean. - If there is one groupby column, the groups are the unique values in the specified groupby column. - If there are multiple groupby columns, the groups are the unique combinations of the specified groupby columns. + Compute mean of groups, excluding missing values. Returns + ------- + pandas.Series or pandas.DataFrame - pandas.core.series.Series - The average of the target column ('B' in the examples below) - grouped by the groupby columns ('A' and ['A', 'C'] in the examples below) + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2], + ... 'B': [np.nan, 2, 3, 4, 5], + ... 'C': [1, 2, 1, 1, 2]}, columns=['A', 'B', 'C']) - Groubpy by one column. The result index are the group labels. + Groupby one column and return the mean of the remaining columns in + each group. - >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2], - ... 'B': [np.nan, 2, 3, 4, 5]}, columns=['A', 'B']) - >>> g = df.groupby('A')['B'].mean() - >>> g + >>> df.groupby('A').mean() + >>> + B C A - 1 3.0 - 2 4.0 + 1 3.0 1.333333 + 2 4.0 1.500000 - Groubpy by multiple columns. + Groupby two columns and return the mean of the remaining column. - >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2], - ... 'B': [np.nan, 2, 3, 4, 5], - ... 'C': [1, 2, 1, 1, 2]}, columns=['A', 'B', 'C']) - >>> g = df.groupby(['A', 'C'])['B'].mean() - >>> g - A C - 1 1 4.0 - 2 2.0 - 2 1 3.0 - 2 5.0 + >>> df.groupby(['A', 'B']).mean() + >>> + C + A B + 1 2.0 2 + 4.0 1 + 2 3.0 1 + 5.0 2 + Groupby one column and return the mean of only particular column in + the group. + + >>> df.groupby('A')['B'].mean() + >>> + A + 1 3.0 + 2 4.0 + Name: B, dtype: float64 """ nv.validate_groupby_func('mean', args, kwargs, ['numeric_only']) try: