From 9500ae42e8b15a3f490ea71352a2078dcb09ec2f Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 22 Aug 2020 15:17:37 +0100 Subject: [PATCH 1/4] REF: remove NDFrame._add_series_or_dataframe_operations --- pandas/core/frame.py | 1 - pandas/core/generic.py | 296 ++++++++--------------------------- pandas/core/series.py | 1 - pandas/core/shared_docs.py | 141 +++++++++++++++++ pandas/core/window/common.py | 2 +- 5 files changed, 210 insertions(+), 231 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 606bd4cc3b52d..95bd757f1994e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9306,7 +9306,6 @@ def _AXIS_NAMES(self) -> Dict[int, str]: DataFrame._add_numeric_operations() -DataFrame._add_series_or_dataframe_operations() ops.add_flex_arithmetic_methods(DataFrame) ops.add_special_arithmetic_methods(DataFrame) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fea3efedb6abb..d6e868d48c835 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6,7 +6,6 @@ import operator import pickle import re -from textwrap import dedent from typing import ( TYPE_CHECKING, Any, @@ -101,6 +100,7 @@ from pandas.core.missing import find_valid_index from pandas.core.ops import _align_method_FRAME from pandas.core.shared_docs import _shared_docs +from pandas.core.window import Expanding, ExponentialMovingWindow, Rolling, Window from pandas.io.formats import format as fmt from pandas.io.formats.format import DataFrameFormatter, format_percentiles @@ -109,9 +109,12 @@ if TYPE_CHECKING: from pandas.core.resample import Resampler from pandas.core.series import Series # noqa: F401 + from pandas.core.window.indexers import BaseIndexer + from pandas._libs.tslibs import BaseOffset # goal is to be able to define the docs close to function, while still being # able to share +_shared_docs = {**_shared_docs} _shared_doc_kwargs = dict( axes="keywords for axes", klass="Series/DataFrame", @@ -5124,54 +5127,9 @@ def pipe(self, func, *args, **kwargs): ... .pipe(g, arg1=a) ... .pipe((func, 'arg2'), arg1=a, arg3=c) ... ) # doctest: +SKIP - """ + """ return com.pipe(self, func, *args, **kwargs) - _shared_docs["aggregate"] = dedent( - """ - Aggregate using one or more operations over the specified axis. - {versionadded} - Parameters - ---------- - func : function, str, list or dict - Function to use for aggregating the data. If a function, must either - work when passed a {klass} or when passed to {klass}.apply. - - Accepted combinations are: - - - function - - string function name - - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` - - dict of axis labels -> functions, function names or list of such. - {axis} - *args - Positional arguments to pass to `func`. - **kwargs - Keyword arguments to pass to `func`. - - Returns - ------- - scalar, Series or DataFrame - - The return can be: - - * scalar : when Series.agg is called with single function - * Series : when DataFrame.agg is called with a single function - * DataFrame : when DataFrame.agg is called with several functions - - Return scalar, Series or DataFrame. - {see_also} - Notes - ----- - `agg` is an alias for `aggregate`. Use the alias. - - In pandas, agg, as most operations just ignores the missing values, - and returns the operation only considering the values that are present. - - A passed user-defined-function will be passed a Series for evaluation. - {examples}""" - ) - # ---------------------------------------------------------------------- # Attribute access @@ -7448,77 +7406,6 @@ def clip( return result - _shared_docs[ - "groupby" - ] = """ - Group %(klass)s using a mapper or by a Series of columns. - - A groupby operation involves some combination of splitting the - object, applying a function, and combining the results. This can be - used to group large amounts of data and compute operations on these - groups. - - Parameters - ---------- - by : mapping, function, label, or list of labels - Used to determine the groups for the groupby. - If ``by`` is a function, it's called on each value of the object's - index. If a dict or Series is passed, the Series or dict VALUES - will be used to determine the groups (the Series' values are first - aligned; see ``.align()`` method). If an ndarray is passed, the - values are used as-is determine the groups. A label or list of - labels may be passed to group by the columns in ``self``. Notice - that a tuple is interpreted as a (single) key. - axis : {0 or 'index', 1 or 'columns'}, default 0 - Split along rows (0) or columns (1). - level : int, level name, or sequence of such, default None - If the axis is a MultiIndex (hierarchical), group by a particular - level or levels. - as_index : bool, default True - For aggregated output, return object with group labels as the - index. Only relevant for DataFrame input. as_index=False is - effectively "SQL-style" grouped output. - sort : bool, default True - Sort group keys. Get better performance by turning this off. - Note this does not influence the order of observations within each - group. Groupby preserves the order of rows within each group. - group_keys : bool, default True - When calling apply, add group keys to index to identify pieces. - squeeze : bool, default False - Reduce the dimensionality of the return type if possible, - otherwise return a consistent type. - - .. deprecated:: 1.1.0 - - observed : bool, default False - This only applies if any of the groupers are Categoricals. - If True: only show observed values for categorical groupers. - If False: show all values for categorical groupers. - - .. versionadded:: 0.23.0 - dropna : bool, default True - If True, and if group keys contain NA values, NA values together - with row/column will be dropped. - If False, NA values will also be treated as the key in groups - - .. versionadded:: 1.1.0 - - Returns - ------- - %(klass)sGroupBy - Returns a groupby object that contains information about the groups. - - See Also - -------- - resample : Convenience method for frequency conversion and resampling - of time series. - - Notes - ----- - See the `user guide - `_ for more. - """ - def asfreq( self: FrameOrSeries, freq, @@ -8427,35 +8314,6 @@ def ranker(data): return ranker(data) - _shared_docs[ - "compare" - ] = """ - Compare to another %(klass)s and show the differences. - - .. versionadded:: 1.1.0 - - Parameters - ---------- - other : %(klass)s - Object to compare with. - - align_axis : {0 or 'index', 1 or 'columns'}, default 1 - Determine which axis to align the comparison on. - - * 0, or 'index' : Resulting differences are stacked vertically - with rows drawn alternately from self and other. - * 1, or 'columns' : Resulting differences are aligned horizontally - with columns drawn alternately from self and other. - - keep_shape : bool, default False - If true, all rows and columns are kept. - Otherwise, only the ones with different values are kept. - - keep_equal : bool, default False - If true, the result keeps values that are equal. - Otherwise, equal values are shown as NaNs. - """ - @Appender(_shared_docs["compare"] % _shared_doc_kwargs) def compare( self, @@ -10585,45 +10443,21 @@ def mad(self, axis=None, skipna=None, level=None): examples=_min_examples, ) - @classmethod - def _add_series_or_dataframe_operations(cls): - """ - Add the series or dataframe only operations to the cls; evaluate - the doc strings again. - """ - from pandas.core.window import ( - Expanding, - ExponentialMovingWindow, - Rolling, - Window, - ) - - @doc(Rolling) - def rolling( - self, - window, - min_periods=None, - center=False, - win_type=None, - on=None, - axis=0, - closed=None, - ): - axis = self._get_axis_number(axis) - - if win_type is not None: - return Window( - self, - window=window, - min_periods=min_periods, - center=center, - win_type=win_type, - on=on, - axis=axis, - closed=closed, - ) + @doc(Rolling) + def rolling( + self, + window: "Union[int, timedelta, BaseOffset, BaseIndexer]", + min_periods: Optional[int] = None, + center: bool_t = False, + win_type: Optional[str] = None, + on: Optional[str] = None, + axis: Axis = 0, + closed: Optional[str] = None, + ): + axis = self._get_axis_number(axis) - return Rolling( + if win_type is not None: + return Window( self, window=window, min_periods=min_periods, @@ -10634,53 +10468,59 @@ def rolling( closed=closed, ) - cls.rolling = rolling - - @doc(Expanding) - def expanding(self, min_periods=1, center=None, axis=0): - axis = self._get_axis_number(axis) - if center is not None: - warnings.warn( - "The `center` argument on `expanding` " - "will be removed in the future", - FutureWarning, - stacklevel=2, - ) - else: - center = False + return Rolling( + self, + window=window, + min_periods=min_periods, + center=center, + win_type=win_type, + on=on, + axis=axis, + closed=closed, + ) - return Expanding(self, min_periods=min_periods, center=center, axis=axis) + @doc(Expanding) + def expanding( + self, min_periods: int = 1, center: Optional[bool_t] = None, axis: Axis = 0 + ) -> Expanding: + axis = self._get_axis_number(axis) + if center is not None: + warnings.warn( + "The `center` argument on `expanding` " "will be removed in the future", + FutureWarning, + stacklevel=2, + ) + else: + center = False - cls.expanding = expanding + return Expanding(self, min_periods=min_periods, center=center, axis=axis) - @doc(ExponentialMovingWindow) - def ewm( + @doc(ExponentialMovingWindow) + def ewm( + self, + com: Optional[float] = None, + span: Optional[float] = None, + halflife: Optional[Union[float, TimedeltaConvertibleTypes]] = None, + alpha: Optional[float] = None, + min_periods: int = 0, + adjust: bool_t = True, + ignore_na: bool_t = False, + axis: Axis = 0, + times: Optional[Union[str, np.ndarray, FrameOrSeries]] = None, + ) -> ExponentialMovingWindow: + axis = self._get_axis_number(axis) + return ExponentialMovingWindow( self, - com=None, - span=None, - halflife=None, - alpha=None, - min_periods=0, - adjust=True, - ignore_na=False, - axis=0, - times=None, - ): - axis = self._get_axis_number(axis) - return ExponentialMovingWindow( - self, - com=com, - span=span, - halflife=halflife, - alpha=alpha, - min_periods=min_periods, - adjust=adjust, - ignore_na=ignore_na, - axis=axis, - times=times, - ) - - cls.ewm = ewm + com=com, + span=span, + halflife=halflife, + alpha=alpha, + min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na, + axis=axis, + times=times, + ) @doc(klass=_shared_doc_kwargs["klass"], axis="") def transform(self, func, *args, **kwargs): diff --git a/pandas/core/series.py b/pandas/core/series.py index 555024ad75f5e..a852529e9b517 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5000,7 +5000,6 @@ def to_period(self, freq=None, copy=True) -> "Series": Series._add_numeric_operations() -Series._add_series_or_dataframe_operations() # Add arithmetic! ops.add_flex_arithmetic_methods(Series) diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index b81942f062b19..a61977cb089e1 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -2,6 +2,147 @@ _shared_docs: Dict[str, str] = dict() +_shared_docs[ + "aggregate" +] = """ + Aggregate using one or more operations over the specified axis. + {versionadded} + Parameters + ---------- + func : function, str, list or dict + Function to use for aggregating the data. If a function, must either + work when passed a {klass} or when passed to {klass}.apply. + + Accepted combinations are: + + - function + - string function name + - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` + - dict of axis labels -> functions, function names or list of such. + {axis} + *args + Positional arguments to pass to `func`. + **kwargs + Keyword arguments to pass to `func`. + + Returns + ------- + scalar, Series or DataFrame + + The return can be: + + * scalar : when Series.agg is called with single function + * Series : when DataFrame.agg is called with a single function + * DataFrame : when DataFrame.agg is called with several functions + + Return scalar, Series or DataFrame. + {see_also} + Notes + ----- + `agg` is an alias for `aggregate`. Use the alias. + + A passed user-defined-function will be passed a Series for evaluation. + {examples}""" + +_shared_docs[ + "compare" +] = """ + Compare to another %(klass)s and show the differences. + + .. versionadded:: 1.1.0 + + Parameters + ---------- + other : %(klass)s + Object to compare with. + + align_axis : {0 or 'index', 1 or 'columns'}, default 1 + Determine which axis to align the comparison on. + + * 0, or 'index' : Resulting differences are stacked vertically + with rows drawn alternately from self and other. + * 1, or 'columns' : Resulting differences are aligned horizontally + with columns drawn alternately from self and other. + + keep_shape : bool, default False + If true, all rows and columns are kept. + Otherwise, only the ones with different values are kept. + + keep_equal : bool, default False + If true, the result keeps values that are equal. + Otherwise, equal values are shown as NaNs. + """ + +_shared_docs[ + "groupby" +] = """ + Group %(klass)s using a mapper or by a Series of columns. + + A groupby operation involves some combination of splitting the + object, applying a function, and combining the results. This can be + used to group large amounts of data and compute operations on these + groups. + + Parameters + ---------- + by : mapping, function, label, or list of labels + Used to determine the groups for the groupby. + If ``by`` is a function, it's called on each value of the object's + index. If a dict or Series is passed, the Series or dict VALUES + will be used to determine the groups (the Series' values are first + aligned; see ``.align()`` method). If an ndarray is passed, the + values are used as-is determine the groups. A label or list of + labels may be passed to group by the columns in ``self``. Notice + that a tuple is interpreted as a (single) key. + axis : {0 or 'index', 1 or 'columns'}, default 0 + Split along rows (0) or columns (1). + level : int, level name, or sequence of such, default None + If the axis is a MultiIndex (hierarchical), group by a particular + level or levels. + as_index : bool, default True + For aggregated output, return object with group labels as the + index. Only relevant for DataFrame input. as_index=False is + effectively "SQL-style" grouped output. + sort : bool, default True + Sort group keys. Get better performance by turning this off. + Note this does not influence the order of observations within each + group. Groupby preserves the order of rows within each group. + group_keys : bool, default True + When calling apply, add group keys to index to identify pieces. + squeeze : bool, default False + Reduce the dimensionality of the return type if possible, + otherwise return a consistent type. + + .. deprecated:: 1.1.0 + + observed : bool, default False + This only applies if any of the groupers are Categoricals. + If True: only show observed values for categorical groupers. + If False: show all values for categorical groupers. + + .. versionadded:: 0.23.0 + dropna : bool, default True + If True, and if group keys contain NA values, NA values together + with row/column will be dropped. + If False, NA values will also be treated as the key in groups + + .. versionadded:: 1.1.0 + + Returns + ------- + %(klass)sGroupBy + Returns a groupby object that contains information about the groups. + + See Also + -------- + resample : Convenience method for frequency conversion and resampling + of time series. + + Notes + ----- + See the `user guide + `_ for more. + """ _shared_docs[ "melt" diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 51a067427e867..2f3058db4493b 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -7,9 +7,9 @@ from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries -from pandas.core.generic import _shared_docs from pandas.core.groupby.base import GroupByMixin from pandas.core.indexes.api import MultiIndex +from pandas.core.shared_docs import _shared_docs _shared_docs = dict(**_shared_docs) _doc_template = """ From 92585901bc2adf8e6e14fa39d948ec6b028d35f1 Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 28 Aug 2020 18:02:11 +0100 Subject: [PATCH 2/4] TYP: drop NDFrame._add_series_or_dataframe_operations --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d6e868d48c835..fb2fd90ff6c05 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5127,7 +5127,7 @@ def pipe(self, func, *args, **kwargs): ... .pipe(g, arg1=a) ... .pipe((func, 'arg2'), arg1=a, arg3=c) ... ) # doctest: +SKIP - """ + """ return com.pipe(self, func, *args, **kwargs) # ---------------------------------------------------------------------- From fdf1ce761a729cccb53fbc9bb2e1f62f4850ea88 Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 28 Aug 2020 18:52:41 +0100 Subject: [PATCH 3/4] fix errors --- pandas/core/generic.py | 2 +- pandas/core/window/rolling.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fb2fd90ff6c05..6936570182356 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10486,7 +10486,7 @@ def expanding( axis = self._get_axis_number(axis) if center is not None: warnings.warn( - "The `center` argument on `expanding` " "will be removed in the future", + "The `center` argument on `expanding` will be removed in the future", FutureWarning, stacklevel=2, ) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index baabdf0fca29a..f5e3587ed02d5 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -22,7 +22,7 @@ from pandas._libs.tslibs import BaseOffset, to_offset import pandas._libs.window.aggregations as window_aggregations -from pandas._typing import ArrayLike, Axis, FrameOrSeriesUnion, Label +from pandas._typing import ArrayLike, Axis, FrameOrSeries, FrameOrSeriesUnion, Label from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution, cache_readonly, doc @@ -159,7 +159,7 @@ class _Window(PandasObject, ShallowMixin, SelectionMixin): def __init__( self, - obj: FrameOrSeriesUnion, + obj: FrameOrSeries, window=None, min_periods: Optional[int] = None, center: bool = False, From dfa9bb1efec0f0e633276e91186dc8e17a55e7ac Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 28 Aug 2020 19:36:07 +0100 Subject: [PATCH 4/4] fix more errors --- pandas/core/generic.py | 3 +- pandas/core/shared_docs.py | 476 ++++++++++++++++++------------------- 2 files changed, 240 insertions(+), 239 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6936570182356..8bdf0861175b2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -107,10 +107,11 @@ from pandas.io.formats.printing import pprint_thing if TYPE_CHECKING: + from pandas._libs.tslibs import BaseOffset + from pandas.core.resample import Resampler from pandas.core.series import Series # noqa: F401 from pandas.core.window.indexers import BaseIndexer - from pandas._libs.tslibs import BaseOffset # goal is to be able to define the docs close to function, while still being # able to share diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index a61977cb089e1..0aaccb47efc44 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -4,256 +4,256 @@ _shared_docs[ "aggregate" -] = """ - Aggregate using one or more operations over the specified axis. - {versionadded} - Parameters - ---------- - func : function, str, list or dict - Function to use for aggregating the data. If a function, must either - work when passed a {klass} or when passed to {klass}.apply. - - Accepted combinations are: - - - function - - string function name - - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` - - dict of axis labels -> functions, function names or list of such. - {axis} - *args - Positional arguments to pass to `func`. - **kwargs - Keyword arguments to pass to `func`. - - Returns - ------- - scalar, Series or DataFrame - - The return can be: - - * scalar : when Series.agg is called with single function - * Series : when DataFrame.agg is called with a single function - * DataFrame : when DataFrame.agg is called with several functions - - Return scalar, Series or DataFrame. - {see_also} - Notes - ----- - `agg` is an alias for `aggregate`. Use the alias. - - A passed user-defined-function will be passed a Series for evaluation. - {examples}""" +] = """\ +Aggregate using one or more operations over the specified axis. +{versionadded} +Parameters +---------- +func : function, str, list or dict + Function to use for aggregating the data. If a function, must either + work when passed a {klass} or when passed to {klass}.apply. + + Accepted combinations are: + + - function + - string function name + - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` + - dict of axis labels -> functions, function names or list of such. +{axis} +*args + Positional arguments to pass to `func`. +**kwargs + Keyword arguments to pass to `func`. + +Returns +------- +scalar, Series or DataFrame + + The return can be: + + * scalar : when Series.agg is called with single function + * Series : when DataFrame.agg is called with a single function + * DataFrame : when DataFrame.agg is called with several functions + + Return scalar, Series or DataFrame. +{see_also} +Notes +----- +`agg` is an alias for `aggregate`. Use the alias. + +A passed user-defined-function will be passed a Series for evaluation. +{examples}""" _shared_docs[ "compare" -] = """ - Compare to another %(klass)s and show the differences. +] = """\ +Compare to another %(klass)s and show the differences. - .. versionadded:: 1.1.0 +.. versionadded:: 1.1.0 - Parameters - ---------- - other : %(klass)s - Object to compare with. +Parameters +---------- +other : %(klass)s + Object to compare with. - align_axis : {0 or 'index', 1 or 'columns'}, default 1 - Determine which axis to align the comparison on. +align_axis : {0 or 'index', 1 or 'columns'}, default 1 + Determine which axis to align the comparison on. - * 0, or 'index' : Resulting differences are stacked vertically - with rows drawn alternately from self and other. - * 1, or 'columns' : Resulting differences are aligned horizontally - with columns drawn alternately from self and other. + * 0, or 'index' : Resulting differences are stacked vertically + with rows drawn alternately from self and other. + * 1, or 'columns' : Resulting differences are aligned horizontally + with columns drawn alternately from self and other. - keep_shape : bool, default False - If true, all rows and columns are kept. - Otherwise, only the ones with different values are kept. +keep_shape : bool, default False + If true, all rows and columns are kept. + Otherwise, only the ones with different values are kept. - keep_equal : bool, default False - If true, the result keeps values that are equal. - Otherwise, equal values are shown as NaNs. - """ +keep_equal : bool, default False + If true, the result keeps values that are equal. + Otherwise, equal values are shown as NaNs. +""" _shared_docs[ "groupby" -] = """ - Group %(klass)s using a mapper or by a Series of columns. - - A groupby operation involves some combination of splitting the - object, applying a function, and combining the results. This can be - used to group large amounts of data and compute operations on these - groups. - - Parameters - ---------- - by : mapping, function, label, or list of labels - Used to determine the groups for the groupby. - If ``by`` is a function, it's called on each value of the object's - index. If a dict or Series is passed, the Series or dict VALUES - will be used to determine the groups (the Series' values are first - aligned; see ``.align()`` method). If an ndarray is passed, the - values are used as-is determine the groups. A label or list of - labels may be passed to group by the columns in ``self``. Notice - that a tuple is interpreted as a (single) key. - axis : {0 or 'index', 1 or 'columns'}, default 0 - Split along rows (0) or columns (1). - level : int, level name, or sequence of such, default None - If the axis is a MultiIndex (hierarchical), group by a particular - level or levels. - as_index : bool, default True - For aggregated output, return object with group labels as the - index. Only relevant for DataFrame input. as_index=False is - effectively "SQL-style" grouped output. - sort : bool, default True - Sort group keys. Get better performance by turning this off. - Note this does not influence the order of observations within each - group. Groupby preserves the order of rows within each group. - group_keys : bool, default True - When calling apply, add group keys to index to identify pieces. - squeeze : bool, default False - Reduce the dimensionality of the return type if possible, - otherwise return a consistent type. - - .. deprecated:: 1.1.0 - - observed : bool, default False - This only applies if any of the groupers are Categoricals. - If True: only show observed values for categorical groupers. - If False: show all values for categorical groupers. - - .. versionadded:: 0.23.0 - dropna : bool, default True - If True, and if group keys contain NA values, NA values together - with row/column will be dropped. - If False, NA values will also be treated as the key in groups - - .. versionadded:: 1.1.0 - - Returns - ------- - %(klass)sGroupBy - Returns a groupby object that contains information about the groups. - - See Also - -------- - resample : Convenience method for frequency conversion and resampling - of time series. - - Notes - ----- - See the `user guide - `_ for more. - """ +] = """\ +Group %(klass)s using a mapper or by a Series of columns. + +A groupby operation involves some combination of splitting the +object, applying a function, and combining the results. This can be +used to group large amounts of data and compute operations on these +groups. + +Parameters +---------- +by : mapping, function, label, or list of labels + Used to determine the groups for the groupby. + If ``by`` is a function, it's called on each value of the object's + index. If a dict or Series is passed, the Series or dict VALUES + will be used to determine the groups (the Series' values are first + aligned; see ``.align()`` method). If an ndarray is passed, the + values are used as-is determine the groups. A label or list of + labels may be passed to group by the columns in ``self``. Notice + that a tuple is interpreted as a (single) key. +axis : {0 or 'index', 1 or 'columns'}, default 0 + Split along rows (0) or columns (1). +level : int, level name, or sequence of such, default None + If the axis is a MultiIndex (hierarchical), group by a particular + level or levels. +as_index : bool, default True + For aggregated output, return object with group labels as the + index. Only relevant for DataFrame input. as_index=False is + effectively "SQL-style" grouped output. +sort : bool, default True + Sort group keys. Get better performance by turning this off. + Note this does not influence the order of observations within each + group. Groupby preserves the order of rows within each group. +group_keys : bool, default True + When calling apply, add group keys to index to identify pieces. +squeeze : bool, default False + Reduce the dimensionality of the return type if possible, + otherwise return a consistent type. + + .. deprecated:: 1.1.0 + +observed : bool, default False + This only applies if any of the groupers are Categoricals. + If True: only show observed values for categorical groupers. + If False: show all values for categorical groupers. + + .. versionadded:: 0.23.0 +dropna : bool, default True + If True, and if group keys contain NA values, NA values together + with row/column will be dropped. + If False, NA values will also be treated as the key in groups + + .. versionadded:: 1.1.0 + +Returns +------- +%(klass)sGroupBy + Returns a groupby object that contains information about the groups. + +See Also +-------- +resample : Convenience method for frequency conversion and resampling + of time series. + +Notes +----- +See the `user guide +`_ for more. +""" _shared_docs[ "melt" -] = """ - Unpivot a DataFrame from wide to long format, optionally leaving identifiers set. - - This function is useful to massage a DataFrame into a format where one - or more columns are identifier variables (`id_vars`), while all other - columns, considered measured variables (`value_vars`), are "unpivoted" to - the row axis, leaving just two non-identifier columns, 'variable' and - 'value'. - %(versionadded)s - Parameters - ---------- - id_vars : tuple, list, or ndarray, optional - Column(s) to use as identifier variables. - value_vars : tuple, list, or ndarray, optional - Column(s) to unpivot. If not specified, uses all columns that - are not set as `id_vars`. - var_name : scalar - Name to use for the 'variable' column. If None it uses - ``frame.columns.name`` or 'variable'. - value_name : scalar, default 'value' - Name to use for the 'value' column. - col_level : int or str, optional - If columns are a MultiIndex then use this level to melt. - ignore_index : bool, default True - If True, original index is ignored. If False, the original index is retained. - Index labels will be repeated as necessary. - - .. versionadded:: 1.1.0 - - Returns - ------- - DataFrame - Unpivoted DataFrame. - - See Also - -------- - %(other)s : Identical method. - pivot_table : Create a spreadsheet-style pivot table as a DataFrame. - DataFrame.pivot : Return reshaped DataFrame organized - by given index / column values. - DataFrame.explode : Explode a DataFrame from list-like - columns to long format. - - Examples - -------- - >>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'}, - ... 'B': {0: 1, 1: 3, 2: 5}, - ... 'C': {0: 2, 1: 4, 2: 6}}) - >>> df - A B C - 0 a 1 2 - 1 b 3 4 - 2 c 5 6 - - >>> %(caller)sid_vars=['A'], value_vars=['B']) - A variable value - 0 a B 1 - 1 b B 3 - 2 c B 5 - - >>> %(caller)sid_vars=['A'], value_vars=['B', 'C']) - A variable value - 0 a B 1 - 1 b B 3 - 2 c B 5 - 3 a C 2 - 4 b C 4 - 5 c C 6 - - The names of 'variable' and 'value' columns can be customized: - - >>> %(caller)sid_vars=['A'], value_vars=['B'], - ... var_name='myVarname', value_name='myValname') - A myVarname myValname - 0 a B 1 - 1 b B 3 - 2 c B 5 - - Original index values can be kept around: - - >>> %(caller)sid_vars=['A'], value_vars=['B', 'C'], ignore_index=False) - A variable value - 0 a B 1 - 1 b B 3 - 2 c B 5 - 0 a C 2 - 1 b C 4 - 2 c C 6 - - If you have multi-index columns: - - >>> df.columns = [list('ABC'), list('DEF')] - >>> df - A B C - D E F - 0 a 1 2 - 1 b 3 4 - 2 c 5 6 - - >>> %(caller)scol_level=0, id_vars=['A'], value_vars=['B']) - A variable value - 0 a B 1 - 1 b B 3 - 2 c B 5 - - >>> %(caller)sid_vars=[('A', 'D')], value_vars=[('B', 'E')]) - (A, D) variable_0 variable_1 value - 0 a B E 1 - 1 b B E 3 - 2 c B E 5 - """ +] = """\ +Unpivot a DataFrame from wide to long format, optionally leaving identifiers set. + +This function is useful to massage a DataFrame into a format where one +or more columns are identifier variables (`id_vars`), while all other +columns, considered measured variables (`value_vars`), are "unpivoted" to +the row axis, leaving just two non-identifier columns, 'variable' and +'value'. +%(versionadded)s +Parameters +---------- +id_vars : tuple, list, or ndarray, optional + Column(s) to use as identifier variables. +value_vars : tuple, list, or ndarray, optional + Column(s) to unpivot. If not specified, uses all columns that + are not set as `id_vars`. +var_name : scalar + Name to use for the 'variable' column. If None it uses + ``frame.columns.name`` or 'variable'. +value_name : scalar, default 'value' + Name to use for the 'value' column. +col_level : int or str, optional + If columns are a MultiIndex then use this level to melt. +ignore_index : bool, default True + If True, original index is ignored. If False, the original index is retained. + Index labels will be repeated as necessary. + + .. versionadded:: 1.1.0 + +Returns +------- +DataFrame + Unpivoted DataFrame. + +See Also +-------- +%(other)s : Identical method. +pivot_table : Create a spreadsheet-style pivot table as a DataFrame. +DataFrame.pivot : Return reshaped DataFrame organized + by given index / column values. +DataFrame.explode : Explode a DataFrame from list-like + columns to long format. + +Examples +-------- +>>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'}, +... 'B': {0: 1, 1: 3, 2: 5}, +... 'C': {0: 2, 1: 4, 2: 6}}) +>>> df + A B C +0 a 1 2 +1 b 3 4 +2 c 5 6 + +>>> %(caller)sid_vars=['A'], value_vars=['B']) + A variable value +0 a B 1 +1 b B 3 +2 c B 5 + +>>> %(caller)sid_vars=['A'], value_vars=['B', 'C']) + A variable value +0 a B 1 +1 b B 3 +2 c B 5 +3 a C 2 +4 b C 4 +5 c C 6 + +The names of 'variable' and 'value' columns can be customized: + +>>> %(caller)sid_vars=['A'], value_vars=['B'], +... var_name='myVarname', value_name='myValname') + A myVarname myValname +0 a B 1 +1 b B 3 +2 c B 5 + +Original index values can be kept around: + +>>> %(caller)sid_vars=['A'], value_vars=['B', 'C'], ignore_index=False) + A variable value +0 a B 1 +1 b B 3 +2 c B 5 +0 a C 2 +1 b C 4 +2 c C 6 + +If you have multi-index columns: + +>>> df.columns = [list('ABC'), list('DEF')] +>>> df + A B C + D E F +0 a 1 2 +1 b 3 4 +2 c 5 6 + +>>> %(caller)scol_level=0, id_vars=['A'], value_vars=['B']) + A variable value +0 a B 1 +1 b B 3 +2 c B 5 + +>>> %(caller)sid_vars=[('A', 'D')], value_vars=[('B', 'E')]) + (A, D) variable_0 variable_1 value +0 a B E 1 +1 b B E 3 +2 c B E 5 +"""