From 42cb9dd6da3aac93f3db004a4a3d1f206984de7d Mon Sep 17 00:00:00 2001 From: Maren Westermann Date: Tue, 24 Jan 2023 16:28:58 +0100 Subject: [PATCH 1/6] add documentation --- pandas/core/groupby/generic.py | 60 ++++++++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 2340c36d14301..5c16e7375adc2 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1001,7 +1001,6 @@ def take( result = self._op_via_apply("take", indices=indices, axis=axis, **kwargs) return result - @doc(Series.skew.__doc__) def skew( self, axis: Axis | lib.NoDefault = lib.no_default, @@ -1009,6 +1008,35 @@ def skew( numeric_only: bool = False, **kwargs, ) -> Series: + """ + Return unbiased skew within groups.\n\nNormalized by N-1. + + Parameters + ---------- + axis : {axis_descr} + Axis for the function to be applied on. + For `Series` this parameter is unused and defaults to 0. + + For DataFrames, specifying ``axis=None`` will apply the aggregation + across both axes. + + .. versionadded:: 2.0.0 + + skipna : bool, default True + Exclude NA/null values when computing the result. + numeric_only : bool, default False + Include only float, int, boolean columns. Not implemented for Series. + + {min_count}\ + **kwargs + Additional keyword arguments to be passed to the function. + + Returns + ------- + {name1} or scalar\ + {see_also}\ + {examples} + """ result = self._op_via_apply( "skew", axis=axis, @@ -2470,7 +2498,6 @@ def take( result = self._op_via_apply("take", indices=indices, axis=axis, **kwargs) return result - @doc(DataFrame.skew.__doc__) def skew( self, axis: Axis | None | lib.NoDefault = lib.no_default, @@ -2478,6 +2505,35 @@ def skew( numeric_only: bool = False, **kwargs, ) -> DataFrame: + """ + Return unbiased skew within groups.\n\nNormalized by N-1. + + Parameters + ---------- + axis : {axis_descr} + Axis for the function to be applied on. + For `Series` this parameter is unused and defaults to 0. + + For DataFrames, specifying ``axis=None`` will apply the aggregation + across both axes. + + .. versionadded:: 2.0.0 + + skipna : bool, default True + Exclude NA/null values when computing the result. + numeric_only : bool, default False + Include only float, int, boolean columns. Not implemented for Series. + + {min_count}\ + **kwargs + Additional keyword arguments to be passed to the function. + + Returns + ------- + {name1} or scalar\ + {see_also}\ + {examples} + """ result = self._op_via_apply( "skew", axis=axis, From 032fa5f4601a7bd75f0529a4199bee890817741f Mon Sep 17 00:00:00 2001 From: Maren Westermann Date: Wed, 1 Feb 2023 19:13:14 +0100 Subject: [PATCH 2/6] apply requested changes --- pandas/core/groupby/generic.py | 91 ++++++++++++++++++++++++++++------ 1 file changed, 75 insertions(+), 16 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 5c16e7375adc2..1dad9d18996dc 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1009,33 +1009,58 @@ def skew( **kwargs, ) -> Series: """ - Return unbiased skew within groups.\n\nNormalized by N-1. + Return unbiased skew within groups. + + Normalized by N-1. Parameters ---------- - axis : {axis_descr} + axis : {0 or 'index', 1 or 'columns', None}, default 0 Axis for the function to be applied on. For `Series` this parameter is unused and defaults to 0. For DataFrames, specifying ``axis=None`` will apply the aggregation across both axes. - .. versionadded:: 2.0.0 - skipna : bool, default True Exclude NA/null values when computing the result. + numeric_only : bool, default False Include only float, int, boolean columns. Not implemented for Series. - {min_count}\ **kwargs Additional keyword arguments to be passed to the function. Returns ------- - {name1} or scalar\ - {see_also}\ - {examples} + scalar or scalar + + See Also + -------- + Series.skew : Return unbiased skew over requested axis. + + Examples + -------- + >>> ser = pd.Series([390., 350., 357., np.nan, 22., 20., 30.], + ... index=['Falcon', 'Falcon', 'Falcon', 'Falcon', 'Parrot', 'Parrot', 'Parrot'], + ... name="Max Speed") + >>> ser + Falcon 390.0 + Falcon 350.0 + Falcon 357.0 + Falcon NaN + Parrot 22.0 + Parrot 20.0 + Parrot 30.0 + Name: Max Speed, dtype: float64 + >>> ser.groupby(level=0).skew() + Falcon 1.525174 + Parrot 1.457863 + Name: Max Speed, dtype: float64 + >>> ser.groupby(level=0).skew(skipna=False) + Falcon NaN + Parrot 1.457863 + Name: Max Speed, dtype: float64 """ result = self._op_via_apply( "skew", @@ -2506,13 +2531,14 @@ def skew( **kwargs, ) -> DataFrame: """ - Return unbiased skew within groups.\n\nNormalized by N-1. + Return unbiased skew within groups. + + Normalized by N-1. Parameters ---------- - axis : {axis_descr} + axis : {0 or 'index', 1 or 'columns', None}, default 0 Axis for the function to be applied on. - For `Series` this parameter is unused and defaults to 0. For DataFrames, specifying ``axis=None`` will apply the aggregation across both axes. @@ -2521,18 +2547,51 @@ def skew( skipna : bool, default True Exclude NA/null values when computing the result. + numeric_only : bool, default False - Include only float, int, boolean columns. Not implemented for Series. + Include only float, int, boolean columns. - {min_count}\ **kwargs Additional keyword arguments to be passed to the function. Returns ------- - {name1} or scalar\ - {see_also}\ - {examples} + Series or scalar + + See Also + -------- + DataFrame.skew : Return unbiased skew over requested axis. + + Examples + -------- + >>> df = pd.DataFrame([('falcon', 'bird', 389.0), + ... ('parrot', 'bird', 24.0), + ... ('cockatoo', 'bird', 70.0), + ... ('kiwi', 'bird', np.nan), + ... ('lion', 'mammal', 80.5), + ... ('monkey', 'mammal', 21.5), + ... ('rabbit', 'mammal', 15.0)], + ... columns=['name', 'class', 'max_speed']) + >>> df + name class max_speed + 0 falcon bird 389.0 + 1 parrot bird 24.0 + 2 cockatoo bird 70.0 + 3 kiwi bird NaN + 4 lion mammal 80.5 + 5 monkey mammal 21.5 + 6 rabbit mammal 15.0 + >>> gb = df.groupby(["class"]) + >>> gb.skew() + max_speed + class + bird 1.628296 + mammal 1.669046 + >>> gb.skew(skipna=False) + max_speed + class + bird NaN + mammal 1.669046 """ result = self._op_via_apply( "skew", From 97823a6a74f4cc51659da1b5d1099d09e97e0840 Mon Sep 17 00:00:00 2001 From: Maren Westermann Date: Wed, 1 Feb 2023 19:27:29 +0100 Subject: [PATCH 3/6] correction --- pandas/core/groupby/generic.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 1dad9d18996dc..b2fef5cbb9870 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1019,9 +1019,6 @@ def skew( Axis for the function to be applied on. For `Series` this parameter is unused and defaults to 0. - For DataFrames, specifying ``axis=None`` will apply the aggregation - across both axes. - skipna : bool, default True Exclude NA/null values when computing the result. From 7ace24e8adc7ae03ce9acdbcadaf3ef42a6f2960 Mon Sep 17 00:00:00 2001 From: Maren Westermann Date: Wed, 1 Feb 2023 19:45:50 +0100 Subject: [PATCH 4/6] fix line too long error --- pandas/core/groupby/generic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index b2fef5cbb9870..1f1e94863dff8 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1039,7 +1039,8 @@ def skew( Examples -------- >>> ser = pd.Series([390., 350., 357., np.nan, 22., 20., 30.], - ... index=['Falcon', 'Falcon', 'Falcon', 'Falcon', 'Parrot', 'Parrot', 'Parrot'], + ... index=['Falcon', 'Falcon', 'Falcon', 'Falcon', \ + ... 'Parrot', 'Parrot', 'Parrot'], ... name="Max Speed") >>> ser Falcon 390.0 From 3e1bc248421a25350c55489a4fe129877cc72505 Mon Sep 17 00:00:00 2001 From: Maren Westermann Date: Thu, 2 Feb 2023 21:55:50 +0100 Subject: [PATCH 5/6] apply requested changes vol2 --- pandas/core/groupby/generic.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 1f1e94863dff8..a1edd42578efe 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1017,7 +1017,7 @@ def skew( ---------- axis : {0 or 'index', 1 or 'columns', None}, default 0 Axis for the function to be applied on. - For `Series` this parameter is unused and defaults to 0. + This parameter is only for compatibility with DataFrame and is unused. skipna : bool, default True Exclude NA/null values when computing the result. @@ -1030,7 +1030,7 @@ def skew( Returns ------- - scalar or scalar + Series See Also -------- @@ -2538,8 +2538,7 @@ def skew( axis : {0 or 'index', 1 or 'columns', None}, default 0 Axis for the function to be applied on. - For DataFrames, specifying ``axis=None`` will apply the aggregation - across both axes. + Specifying ``axis=None`` will apply the aggregation across both axes. .. versionadded:: 2.0.0 @@ -2554,7 +2553,7 @@ def skew( Returns ------- - Series or scalar + DataFrame See Also -------- From 2a413ac848eb5871b4e414cf5f8da7db4692e97d Mon Sep 17 00:00:00 2001 From: Maren Westermann Date: Fri, 3 Feb 2023 21:29:11 +0100 Subject: [PATCH 6/6] address ci failures --- pandas/core/groupby/generic.py | 35 +++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index a1edd42578efe..852c083290d55 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1039,7 +1039,7 @@ def skew( Examples -------- >>> ser = pd.Series([390., 350., 357., np.nan, 22., 20., 30.], - ... index=['Falcon', 'Falcon', 'Falcon', 'Falcon', \ + ... index=['Falcon', 'Falcon', 'Falcon', 'Falcon', ... 'Parrot', 'Parrot', 'Parrot'], ... name="Max Speed") >>> ser @@ -2561,23 +2561,24 @@ def skew( Examples -------- - >>> df = pd.DataFrame([('falcon', 'bird', 389.0), - ... ('parrot', 'bird', 24.0), - ... ('cockatoo', 'bird', 70.0), - ... ('kiwi', 'bird', np.nan), - ... ('lion', 'mammal', 80.5), - ... ('monkey', 'mammal', 21.5), - ... ('rabbit', 'mammal', 15.0)], - ... columns=['name', 'class', 'max_speed']) + >>> arrays = [['falcon', 'parrot', 'cockatoo', 'kiwi', + ... 'lion', 'monkey', 'rabbit'], + ... ['bird', 'bird', 'bird', 'bird', + ... 'mammal', 'mammal', 'mammal']] + >>> index = pd.MultiIndex.from_arrays(arrays, names=('name', 'class')) + >>> df = pd.DataFrame({'max_speed': [389.0, 24.0, 70.0, np.nan, + ... 80.5, 21.5, 15.0]}, + ... index=index) >>> df - name class max_speed - 0 falcon bird 389.0 - 1 parrot bird 24.0 - 2 cockatoo bird 70.0 - 3 kiwi bird NaN - 4 lion mammal 80.5 - 5 monkey mammal 21.5 - 6 rabbit mammal 15.0 + max_speed + name class + falcon bird 389.0 + parrot bird 24.0 + cockatoo bird 70.0 + kiwi bird NaN + lion mammal 80.5 + monkey mammal 21.5 + rabbit mammal 15.0 >>> gb = df.groupby(["class"]) >>> gb.skew() max_speed