From f93fc63a9d036ab00b3f9f68b174dfa34664a310 Mon Sep 17 00:00:00 2001 From: tuhinsharma121 Date: Sat, 4 May 2024 17:47:01 +0530 Subject: [PATCH 1/3] DOC: add PR01,RT03,SA01 for pandas.DataFrame.var --- pandas/core/frame.py | 68 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 96943eb71c7bd..e7136c75608ee 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -12065,7 +12065,6 @@ def var( ) -> Series | Any: ... @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="var") - @doc(make_doc("var", ndim=2)) def var( self, axis: Axis | None = 0, @@ -12074,6 +12073,73 @@ def var( numeric_only: bool = False, **kwargs, ) -> Series | Any: + """ + Return unbiased variance over requested axis. + + Normalized by N-1 by default. This can be changed using the ddof argument. + + Parameters + ---------- + axis : {index (0), columns (1)} + For `Series` this parameter is unused and defaults to 0. + + .. warning:: + + The behavior of DataFrame.var with ``axis=None`` is deprecated, + in a future version this will reduce over both axes and return a scalar + To retain the old behavior, pass axis=0 (or do not pass axis). + + skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. + numeric_only : bool, default False + Include only float, int, boolean columns. Not implemented for Series. + **kwargs : + Additional keywords passed. + + Returns + ------- + Series or scalaer + Unbiased variance over requested axis. + + See Also + -------- + numpy.var : Equivalent function in NumPy. + DataFrame.std : Return the standard deviation of the values over + the requested axis.. + + Examples + -------- + >>> df = pd.DataFrame( + ... { + ... "person_id": [0, 1, 2, 3], + ... "age": [21, 25, 62, 43], + ... "height": [1.61, 1.87, 1.49, 2.01], + ... } + ... ).set_index("person_id") + >>> df + age height + person_id + 0 21 1.61 + 1 25 1.87 + 2 62 1.49 + 3 43 2.01 + + >>> df.var() + age 352.916667 + height 0.056367 + dtype: float64 + + Alternatively, ``ddof=0`` can be set to normalize by N instead of N-1: + + >>> df.var(ddof=0) + age 264.687500 + height 0.042275 + dtype: float64 + """ result = super().var( axis=axis, skipna=skipna, ddof=ddof, numeric_only=numeric_only, **kwargs ) From 1f1189cceb836bdfd8252429e8bcb4d36d0d8d65 Mon Sep 17 00:00:00 2001 From: tuhinsharma121 Date: Sat, 4 May 2024 17:47:30 +0530 Subject: [PATCH 2/3] DOC: remove PR01,RT03,SA01 for pandas.DataFrame.var --- ci/code_checks.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 936e3664cfe93..db788c6830124 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -79,7 +79,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.DataFrame.sum RT03" \ -i "pandas.DataFrame.swaplevel SA01" \ -i "pandas.DataFrame.to_markdown SA01" \ - -i "pandas.DataFrame.var PR01,RT03,SA01" \ -i "pandas.Grouper PR02" \ -i "pandas.Index PR07" \ -i "pandas.Index.join PR07,RT03,SA01" \ From decfb78fd5e305d7bc8a2b5d0b1b2743bf6c4524 Mon Sep 17 00:00:00 2001 From: tuhinsharma121 Date: Sun, 5 May 2024 07:44:21 +0530 Subject: [PATCH 3/3] DOC: add more examples --- pandas/core/frame.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e7136c75608ee..6b688d67c91b3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -12108,8 +12108,10 @@ def var( See Also -------- numpy.var : Equivalent function in NumPy. - DataFrame.std : Return the standard deviation of the values over - the requested axis.. + Series.var : Return unbiased variance over Series values. + Series.std : Return standard deviation over Series values. + DataFrame.std : Return standard deviation of the values over + the requested axis. Examples --------