From 0e0c720e05e6cdd9f71a6bd9a49f6149260c9f9a Mon Sep 17 00:00:00 2001 From: tuhinsharma121 Date: Sat, 4 May 2024 15:11:31 +0530 Subject: [PATCH 1/4] DOC: add PR01,RT03,SA01 for pandas.DataFrame.std --- pandas/core/frame.py | 76 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 96943eb71c7bd..5a817d76218a7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -12116,7 +12116,6 @@ def std( ) -> Series | Any: ... @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="std") - @doc(make_doc("std", ndim=2)) def std( self, axis: Axis | None = 0, @@ -12125,6 +12124,81 @@ def std( numeric_only: bool = False, **kwargs, ) -> Series | Any: + """ + Return sample standard deviation over requested axis. + + Normalized by N-1 by default. This can be changed using the ddof argument. + + Parameters + ---------- + axis : {index (0), columns (1)} + For `Series` this parameter is unused and defaults to 0. + + .. warning:: + + The behavior of DataFrame.std with ``axis=None`` is deprecated, + in a future version this will reduce over both axes and return a scalar + To retain the old behavior, pass axis=0 (or do not pass axis). + + skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. + numeric_only : bool, default False + Include only float, int, boolean columns. Not implemented for Series. + **kwargs : dict + Parameters to accommodate numpy arguments. + + Returns + ------- + Series or DataFrame (if level specified) + Standard deviation over requested axis. + + See Also + -------- + DataFrame.mean : Return the mean of the values over the requested axis. + DataFrame.mediam : Return the mediam of the values over the requested axis. + DataFrame.mode : Get the mode(s) of each element along the requested axis. + DataFrame.sum : Return the sum of the values over the requested axis. + + Notes + ----- + To have the same behaviour as `numpy.std`, use `ddof=0` (instead of the + default `ddof=1`) + + Examples + -------- + >>> df = pd.DataFrame( + ... { + ... "person_id": [0, 1, 2, 3], + ... "age": [21, 25, 62, 43], + ... "height": [1.61, 1.87, 1.49, 2.01], + ... } + ... ).set_index("person_id") + >>> df + age height + person_id + 0 21 1.61 + 1 25 1.87 + 2 62 1.49 + 3 43 2.01 + + The standard deviation of the columns can be found as follows: + + >>> df.std() + age 18.786076 + height 0.237417 + dtype: float64 + + Alternatively, `ddof=0` can be set to normalize by N instead of N-1: + + >>> df.std(ddof=0) + age 16.269219 + height 0.205609 + dtype: float64 + """ result = super().std( axis=axis, skipna=skipna, ddof=ddof, numeric_only=numeric_only, **kwargs ) From 2cddb299cf6b763915212eb80e32533c2c201e3b Mon Sep 17 00:00:00 2001 From: tuhinsharma121 Date: Sat, 4 May 2024 15:15:56 +0530 Subject: [PATCH 2/4] DOC: remove PR01,RT03,SA01 for pandas.DataFrame.std --- ci/code_checks.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 936e3664cfe93..492ac5a8af620 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -75,7 +75,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.DataFrame.median RT03,SA01" \ -i "pandas.DataFrame.min RT03" \ -i "pandas.DataFrame.plot PR02,SA01" \ - -i "pandas.DataFrame.std PR01,RT03,SA01" \ -i "pandas.DataFrame.sum RT03" \ -i "pandas.DataFrame.swaplevel SA01" \ -i "pandas.DataFrame.to_markdown SA01" \ From f0a94ae6a334b4e042394b3d3e95cbfff0e0f388 Mon Sep 17 00:00:00 2001 From: tuhinsharma121 Date: Sat, 4 May 2024 15:29:53 +0530 Subject: [PATCH 3/4] DOC: add PR01,RT03,SA01 for pandas.DataFrame.std --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5a817d76218a7..7cc04786489d1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -12149,7 +12149,7 @@ def std( numeric_only : bool, default False Include only float, int, boolean columns. Not implemented for Series. **kwargs : dict - Parameters to accommodate numpy arguments. + Additional keyword arguments to be passed to the function. Returns ------- From 5663a7327d6440bd14ebba03561e817e468823ed Mon Sep 17 00:00:00 2001 From: tuhinsharma121 Date: Sat, 4 May 2024 15:43:23 +0530 Subject: [PATCH 4/4] DOC: add Series.std for pandas.DataFrame.std --- pandas/core/frame.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7cc04786489d1..215dc920a7f2c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -12153,11 +12153,12 @@ def std( Returns ------- - Series or DataFrame (if level specified) + Series or scalar Standard deviation over requested axis. See Also -------- + Series.std : Return standard deviation over Series values. DataFrame.mean : Return the mean of the values over the requested axis. DataFrame.mediam : Return the mediam of the values over the requested axis. DataFrame.mode : Get the mode(s) of each element along the requested axis.