From 73bd4686b094309548fc1ee9eb763b4058715676 Mon Sep 17 00:00:00 2001 From: tuhinsharma121 Date: Wed, 5 Jun 2024 21:05:53 +0530 Subject: [PATCH] DOC: fix PR07 for pandas.pivot --- ci/code_checks.sh | 1 - pandas/core/reshape/pivot.py | 153 +++++++++++++++++++++++++++++++++-- 2 files changed, 146 insertions(+), 8 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 039700f306e03..4fb4065982ba8 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -469,7 +469,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.merge_asof PR07,RT03" \ -i "pandas.merge_ordered PR07" \ -i "pandas.period_range RT03,SA01" \ - -i "pandas.pivot PR07" \ -i "pandas.plotting.andrews_curves RT03,SA01" \ -i "pandas.plotting.lag_plot RT03,SA01" \ -i "pandas.plotting.scatter_matrix PR07,SA01" \ diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 86da19f13bacf..ff993c039bf9a 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -11,10 +11,6 @@ import numpy as np from pandas._libs import lib -from pandas.util._decorators import ( - Appender, - Substitution, -) from pandas.core.dtypes.cast import maybe_downcast_to_dtype from pandas.core.dtypes.common import ( @@ -29,7 +25,6 @@ ) import pandas.core.common as com -from pandas.core.frame import _shared_docs from pandas.core.groupby import Grouper from pandas.core.indexes.api import ( Index, @@ -656,8 +651,6 @@ def _convert_by(by): return by -@Substitution("\ndata : DataFrame") -@Appender(_shared_docs["pivot"], indents=1) def pivot( data: DataFrame, *, @@ -665,6 +658,152 @@ def pivot( index: IndexLabel | lib.NoDefault = lib.no_default, values: IndexLabel | lib.NoDefault = lib.no_default, ) -> DataFrame: + """ + Return reshaped DataFrame organized by given index / column values. + + Reshape data (produce a "pivot" table) based on column values. Uses + unique values from specified `index` / `columns` to form axes of the + resulting DataFrame. This function does not support data + aggregation, multiple values will result in a MultiIndex in the + columns. See the :ref:`User Guide ` for more on reshaping. + + Parameters + ---------- + data : DataFrame + Input pandas DataFrame object. + columns : str or object or a list of str + Column to use to make new frame's columns. + index : str or object or a list of str, optional + Column to use to make new frame's index. If not given, uses existing index. + values : str, object or a list of the previous, optional + Column(s) to use for populating new frame's values. If not + specified, all remaining columns will be used and the result will + have hierarchically indexed columns. + + Returns + ------- + DataFrame + Returns reshaped DataFrame. + + Raises + ------ + ValueError: + When there are any `index`, `columns` combinations with multiple + values. `DataFrame.pivot_table` when you need to aggregate. + + See Also + -------- + DataFrame.pivot_table : Generalization of pivot that can handle + duplicate values for one index/column pair. + DataFrame.unstack : Pivot based on the index values instead of a + column. + wide_to_long : Wide panel to long format. Less flexible but more + user-friendly than melt. + + Notes + ----- + For finer-tuned control, see hierarchical indexing documentation along + with the related stack/unstack methods. + + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + >>> df = pd.DataFrame( + ... { + ... "foo": ["one", "one", "one", "two", "two", "two"], + ... "bar": ["A", "B", "C", "A", "B", "C"], + ... "baz": [1, 2, 3, 4, 5, 6], + ... "zoo": ["x", "y", "z", "q", "w", "t"], + ... } + ... ) + >>> df + foo bar baz zoo + 0 one A 1 x + 1 one B 2 y + 2 one C 3 z + 3 two A 4 q + 4 two B 5 w + 5 two C 6 t + + >>> df.pivot(index="foo", columns="bar", values="baz") + bar A B C + foo + one 1 2 3 + two 4 5 6 + + >>> df.pivot(index="foo", columns="bar")["baz"] + bar A B C + foo + one 1 2 3 + two 4 5 6 + + >>> df.pivot(index="foo", columns="bar", values=["baz", "zoo"]) + baz zoo + bar A B C A B C + foo + one 1 2 3 x y z + two 4 5 6 q w t + + You could also assign a list of column names or a list of index names. + + >>> df = pd.DataFrame( + ... { + ... "lev1": [1, 1, 1, 2, 2, 2], + ... "lev2": [1, 1, 2, 1, 1, 2], + ... "lev3": [1, 2, 1, 2, 1, 2], + ... "lev4": [1, 2, 3, 4, 5, 6], + ... "values": [0, 1, 2, 3, 4, 5], + ... } + ... ) + >>> df + lev1 lev2 lev3 lev4 values + 0 1 1 1 1 0 + 1 1 1 2 2 1 + 2 1 2 1 3 2 + 3 2 1 2 4 3 + 4 2 1 1 5 4 + 5 2 2 2 6 5 + + >>> df.pivot(index="lev1", columns=["lev2", "lev3"], values="values") + lev2 1 2 + lev3 1 2 1 2 + lev1 + 1 0.0 1.0 2.0 NaN + 2 4.0 3.0 NaN 5.0 + + >>> df.pivot(index=["lev1", "lev2"], columns=["lev3"], values="values") + lev3 1 2 + lev1 lev2 + 1 1 0.0 1.0 + 2 2.0 NaN + 2 1 4.0 3.0 + 2 NaN 5.0 + + A ValueError is raised if there are any duplicates. + + >>> df = pd.DataFrame( + ... { + ... "foo": ["one", "one", "two", "two"], + ... "bar": ["A", "A", "B", "C"], + ... "baz": [1, 2, 3, 4], + ... } + ... ) + >>> df + foo bar baz + 0 one A 1 + 1 one A 2 + 2 two B 3 + 3 two C 4 + + Notice that the first two rows are the same for our `index` + and `columns` arguments. + + >>> df.pivot(index="foo", columns="bar", values="baz") + Traceback (most recent call last): + ... + ValueError: Index contains duplicate entries, cannot reshape + """ columns_listlike = com.convert_to_list_like(columns) # If columns is None we will create a MultiIndex level with None as name