From 916624c4aedb9a20d063dca69800567ef7908e1c Mon Sep 17 00:00:00 2001 From: jain_nehil Date: Sat, 10 Mar 2018 12:03:55 -0500 Subject: [PATCH 1/5] DOC: Improve the docstring of DataFrame.describe() --- pandas/core/generic.py | 73 +++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9f2112729a503..72e0acf6529f3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7179,7 +7179,7 @@ def abs(self): def describe(self, percentiles=None, include=None, exclude=None): """ - Generates descriptive statistics that summarize the central tendency, + Generate descriptive statistics that summarize the central tendency, dispersion and shape of a dataset's distribution, excluding ``NaN`` values. @@ -7267,6 +7267,7 @@ def describe(self, percentiles=None, include=None, exclude=None): 50% 2.0 75% 2.5 max 3.0 + dtype: float64 Describing a categorical ``Series``. @@ -7315,18 +7316,18 @@ def describe(self, percentiles=None, include=None, exclude=None): Describing all columns of a ``DataFrame`` regardless of data type. >>> df.describe(include='all') - categorical numeric object - count 3 3.0 3 - unique 3 NaN 3 - top f NaN c - freq 1 NaN 1 - mean NaN 2.0 NaN - std NaN 1.0 NaN - min NaN 1.0 NaN - 25% NaN 1.5 NaN - 50% NaN 2.0 NaN - 75% NaN 2.5 NaN - max NaN 3.0 NaN + object numeric categorical + count 3 3.0 3 + unique 3 NaN 3 + top c NaN f + freq 1 NaN 1 + mean NaN 2.0 NaN + std NaN 1.0 NaN + min NaN 1.0 NaN + 25% NaN 1.5 NaN + 50% NaN 2.0 NaN + 75% NaN 2.5 NaN + max NaN 3.0 NaN Describing a column from a ``DataFrame`` by accessing it as an attribute. @@ -7376,36 +7377,36 @@ def describe(self, percentiles=None, include=None, exclude=None): Excluding numeric columns from a ``DataFrame`` description. >>> df.describe(exclude=[np.number]) - categorical object - count 3 3 - unique 3 3 - top f c - freq 1 1 + object categorical + count 3 3 + unique 3 3 + top c f + freq 1 1 Excluding object columns from a ``DataFrame`` description. >>> df.describe(exclude=[np.object]) - categorical numeric - count 3 3.0 - unique 3 NaN - top f NaN - freq 1 NaN - mean NaN 2.0 - std NaN 1.0 - min NaN 1.0 - 25% NaN 1.5 - 50% NaN 2.0 - 75% NaN 2.5 - max NaN 3.0 + numeric categorical + count 3.0 3 + unique NaN 3 + top NaN f + freq NaN 1 + mean 2.0 NaN + std 1.0 NaN + min 1.0 NaN + 25% 1.5 NaN + 50% 2.0 NaN + 75% 2.5 NaN + max 3.0 NaN See Also -------- - DataFrame.count - DataFrame.max - DataFrame.min - DataFrame.mean - DataFrame.std - DataFrame.select_dtypes + DataFrame.count : Count number of non-NA/null observations + DataFrame.max : Maximum of the values in the object + DataFrame.min : Minimum of the values in the object + DataFrame.mean : Mean of the values + DataFrame.std : Standard deviation of the obersvations + DataFrame.select_dtypes : Subset of a DataFrame including/excluding columns based on their dtype """ if self.ndim >= 3: msg = "describe is not implemented on Panel objects." From d36509859304a4f5032c7eb4b7ca662415c12a3f Mon Sep 17 00:00:00 2001 From: jain_nehil Date: Sun, 11 Mar 2018 15:13:18 -0400 Subject: [PATCH 2/5] fixed order of columns --- pandas/core/generic.py | 62 +++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 72e0acf6529f3..c4af950f88ce7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7298,9 +7298,9 @@ def describe(self, percentiles=None, include=None, exclude=None): Describing a ``DataFrame``. By default only numeric fields are returned. - >>> df = pd.DataFrame({ 'object': ['a', 'b', 'c'], + >>> df = pd.DataFrame({ 'categorical': pd.Categorical(['d','e','f']), ... 'numeric': [1, 2, 3], - ... 'categorical': pd.Categorical(['d','e','f']) + ... 'object': ['a', 'b', 'c'] ... }) >>> df.describe() numeric @@ -7316,18 +7316,18 @@ def describe(self, percentiles=None, include=None, exclude=None): Describing all columns of a ``DataFrame`` regardless of data type. >>> df.describe(include='all') - object numeric categorical - count 3 3.0 3 - unique 3 NaN 3 - top c NaN f - freq 1 NaN 1 - mean NaN 2.0 NaN - std NaN 1.0 NaN - min NaN 1.0 NaN - 25% NaN 1.5 NaN - 50% NaN 2.0 NaN - 75% NaN 2.5 NaN - max NaN 3.0 NaN + categorical numeric object + count 3 3.0 3 + unique 3 NaN 3 + top f NaN c + freq 1 NaN 1 + mean NaN 2.0 NaN + std NaN 1.0 NaN + min NaN 1.0 NaN + 25% NaN 1.5 NaN + 50% NaN 2.0 NaN + 75% NaN 2.5 NaN + max NaN 3.0 NaN Describing a column from a ``DataFrame`` by accessing it as an attribute. @@ -7377,27 +7377,27 @@ def describe(self, percentiles=None, include=None, exclude=None): Excluding numeric columns from a ``DataFrame`` description. >>> df.describe(exclude=[np.number]) - object categorical - count 3 3 - unique 3 3 - top c f - freq 1 1 + categorical object + count 3 3 + unique 3 3 + top f c + freq 1 1 Excluding object columns from a ``DataFrame`` description. >>> df.describe(exclude=[np.object]) - numeric categorical - count 3.0 3 - unique NaN 3 - top NaN f - freq NaN 1 - mean 2.0 NaN - std 1.0 NaN - min 1.0 NaN - 25% 1.5 NaN - 50% 2.0 NaN - 75% 2.5 NaN - max 3.0 NaN + categorical numeric + count 3 3.0 + unique 3 NaN + top f NaN + freq 1 NaN + mean NaN 2.0 + std NaN 1.0 + min NaN 1.0 + 25% NaN 1.5 + 50% NaN 2.0 + 75% NaN 2.5 + max NaN 3.0 See Also -------- From aa13b25028264b0e6fd3acf4866e8b20e2836b6b Mon Sep 17 00:00:00 2001 From: jain_nehil Date: Sun, 11 Mar 2018 15:27:10 -0400 Subject: [PATCH 3/5] more comments incorporated --- pandas/core/generic.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 26e316677febe..ef1a9dfc76f3c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7261,6 +7261,16 @@ def describe(self, percentiles=None, include=None, exclude=None): which columns in a ``DataFrame`` are analyzed for the output. The parameters are ignored when analyzing a ``Series``. + See Also + -------- + DataFrame.count: Count number of non-NA/null observations. + DataFrame.max: Maximum of the values in the object. + DataFrame.min: Minimum of the values in the object. + DataFrame.mean: Mean of the values. + DataFrame.std: Standard deviation of the obersvations. + DataFrame.select_dtypes: Subset of a DataFrame including/excluding + columns based on their dtype. + Examples -------- Describing a numeric ``Series``. @@ -7406,15 +7416,6 @@ def describe(self, percentiles=None, include=None, exclude=None): 50% NaN 2.0 75% NaN 2.5 max NaN 3.0 - - See Also - -------- - DataFrame.count : Count number of non-NA/null observations - DataFrame.max : Maximum of the values in the object - DataFrame.min : Minimum of the values in the object - DataFrame.mean : Mean of the values - DataFrame.std : Standard deviation of the obersvations - DataFrame.select_dtypes : Subset of a DataFrame including/excluding columns based on their dtype """ if self.ndim >= 3: msg = "describe is not implemented on Panel objects." From 8da3c9a759043ba925f11a5f680cce459b54decb Mon Sep 17 00:00:00 2001 From: jain_nehil Date: Sun, 11 Mar 2018 15:48:16 -0400 Subject: [PATCH 4/5] return documentation changed and see also moved so section 5 as per documenation conventions --- pandas/core/generic.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ef1a9dfc76f3c..b31c1caab7a59 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7231,7 +7231,18 @@ def describe(self, percentiles=None, include=None, exclude=None): Returns ------- - summary: Series/DataFrame of summary statistics + Series or DataFrame + Summary statistics of the Series or Dataframe provided. + + See Also + -------- + DataFrame.count: Count number of non-NA/null observations. + DataFrame.max: Maximum of the values in the object. + DataFrame.min: Minimum of the values in the object. + DataFrame.mean: Mean of the values. + DataFrame.std: Standard deviation of the obersvations. + DataFrame.select_dtypes: Subset of a DataFrame including/excluding + columns based on their dtype. Notes ----- @@ -7261,16 +7272,6 @@ def describe(self, percentiles=None, include=None, exclude=None): which columns in a ``DataFrame`` are analyzed for the output. The parameters are ignored when analyzing a ``Series``. - See Also - -------- - DataFrame.count: Count number of non-NA/null observations. - DataFrame.max: Maximum of the values in the object. - DataFrame.min: Minimum of the values in the object. - DataFrame.mean: Mean of the values. - DataFrame.std: Standard deviation of the obersvations. - DataFrame.select_dtypes: Subset of a DataFrame including/excluding - columns based on their dtype. - Examples -------- Describing a numeric ``Series``. From 12778602d64466a3735699486129f769fcb782ab Mon Sep 17 00:00:00 2001 From: jain_nehil Date: Wed, 21 Mar 2018 16:11:47 -0400 Subject: [PATCH 5/5] missed a pep-8 related comment --- pandas/core/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b31c1caab7a59..3a604f959eac5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7317,9 +7317,9 @@ def describe(self, percentiles=None, include=None, exclude=None): Describing a ``DataFrame``. By default only numeric fields are returned. - >>> df = pd.DataFrame({ 'categorical': pd.Categorical(['d','e','f']), - ... 'numeric': [1, 2, 3], - ... 'object': ['a', 'b', 'c'] + >>> df = pd.DataFrame({'categorical': pd.Categorical(['d','e','f']), + ... 'numeric': [1, 2, 3], + ... 'object': ['a', 'b', 'c'] ... }) >>> df.describe() numeric