From df91bcbf657b25fd1ee7357824280d10ac7b4f9c Mon Sep 17 00:00:00 2001 From: James Myatt Date: Mon, 3 Dec 2018 14:26:10 +0000 Subject: [PATCH 01/10] DOC: Correct/update skipna docstrings for `any` and `all` (#23109) Also include examples with NA values and clarify treatment of NA with `skipna == False` --- doc/source/whatsnew/v0.24.0.rst | 1 + pandas/core/generic.py | 28 +++++++++++++++++++++------- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 7617ad5b428a2..5317a44e86102 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -366,6 +366,7 @@ Other Enhancements - :meth:`DataFrame.to_stata` and :class:` pandas.io.stata.StataWriter117` can write mixed sting columns to Stata strl format (:issue:`23633`) - :meth:`DataFrame.between_time` and :meth:`DataFrame.at_time` have gained the an ``axis`` parameter (:issue: `8839`) - :class:`IntervalIndex` has gained the :attr:`~IntervalIndex.is_overlapping` attribute to indicate if the ``IntervalIndex`` contains any overlapping intervals (:issue:`23309`) +- :meth:`any` and :meth:`all` aggregation methods have corrected/improved docstrings for the ``skipna`` parameter (:issue:`23019`). .. _whatsnew_0240.api_breaking: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c58c84b422209..4cdbb21776062 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9862,10 +9862,10 @@ def _add_numeric_operations(cls): axis_descr, name, name2 = _doc_parms(cls) cls.any = _make_logical_function( - cls, 'any', name, name2, axis_descr, + cls, 'any', name, name2, False, axis_descr, _any_desc, nanops.nanany, _any_examples, _any_see_also) cls.all = _make_logical_function( - cls, 'all', name, name2, axis_descr, _all_doc, + cls, 'all', name, name2, True, axis_descr, _all_doc, nanops.nanall, _all_examples, _all_see_also) @Substitution(outname='mad', @@ -10188,8 +10188,10 @@ def _doc_parms(cls): Include only boolean columns. If None, will attempt to use everything, then use only boolean data. Not implemented for Series. skipna : boolean, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA. + Exclude NA/null values. If the entire row/column is NA and skipna is + True, then the result will be %(empty_value)s, as for an empty row/column. + If skipna is False, then NA are treated as True, because these are not + equal to zero. level : int or level name, default None If the axis is a MultiIndex (hierarchical), count along a particular level, collapsing into a %(name1)s. @@ -10219,6 +10221,10 @@ def _doc_parms(cls): True >>> pd.Series([True, False]).all() False +>>> pd.Series([]).all() +True +>>> pd.Series([np.nan]).all() +True DataFrames @@ -10576,6 +10582,13 @@ def _doc_parms(cls): >>> pd.Series([True, False]).any() True +>>> pd.Series([]).any() +False +>>> pd.Series([np.nan]).any() +False +>>> pd.Series([np.nan]).any(skipna=False) +True + **DataFrame** @@ -10860,10 +10873,11 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs): return set_function_name(cum_func, name, cls) -def _make_logical_function(cls, name, name1, name2, axis_descr, desc, f, - examples, see_also): +def _make_logical_function(cls, name, name1, name2, empty_value, axis_descr, + desc, f, examples, see_also): @Substitution(outname=name, desc=desc, name1=name1, name2=name2, - axis_descr=axis_descr, examples=examples, see_also=see_also) + empty_value=empty_value, axis_descr=axis_descr, + examples=examples, see_also=see_also) @Appender(_bool_doc) def logical_func(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): From 95dd70a427cfa5239cbffb787cf53fe5aba81f8c Mon Sep 17 00:00:00 2001 From: James Myatt Date: Sat, 8 Dec 2018 16:04:41 +0000 Subject: [PATCH 02/10] Address comments --- doc/source/whatsnew/v0.24.0.rst | 1 - pandas/core/generic.py | 12 ++++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 5317a44e86102..7617ad5b428a2 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -366,7 +366,6 @@ Other Enhancements - :meth:`DataFrame.to_stata` and :class:` pandas.io.stata.StataWriter117` can write mixed sting columns to Stata strl format (:issue:`23633`) - :meth:`DataFrame.between_time` and :meth:`DataFrame.at_time` have gained the an ``axis`` parameter (:issue: `8839`) - :class:`IntervalIndex` has gained the :attr:`~IntervalIndex.is_overlapping` attribute to indicate if the ``IntervalIndex`` contains any overlapping intervals (:issue:`23309`) -- :meth:`any` and :meth:`all` aggregation methods have corrected/improved docstrings for the ``skipna`` parameter (:issue:`23019`). .. _whatsnew_0240.api_breaking: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4cdbb21776062..2ef6d8b86a97d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9862,11 +9862,11 @@ def _add_numeric_operations(cls): axis_descr, name, name2 = _doc_parms(cls) cls.any = _make_logical_function( - cls, 'any', name, name2, False, axis_descr, - _any_desc, nanops.nanany, _any_examples, _any_see_also) + cls, 'any', name, name2, axis_descr, _any_desc, nanops.nanany, + _any_examples, _any_see_also, empty_value=False) cls.all = _make_logical_function( - cls, 'all', name, name2, True, axis_descr, _all_doc, - nanops.nanall, _all_examples, _all_see_also) + cls, 'all', name, name2, axis_descr, _all_doc, nanops.nanall, + _all_examples, _all_see_also, empty_value=True) @Substitution(outname='mad', desc="Return the mean absolute deviation of the values " @@ -10873,8 +10873,8 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs): return set_function_name(cum_func, name, cls) -def _make_logical_function(cls, name, name1, name2, empty_value, axis_descr, - desc, f, examples, see_also): +def _make_logical_function(cls, name, name1, name2, axis_descr, + desc, f, examples, see_also, empty_value): @Substitution(outname=name, desc=desc, name1=name1, name2=name2, empty_value=empty_value, axis_descr=axis_descr, examples=examples, see_also=see_also) From fa9ca72412360c2b9064fd97c1093da4c0dd065b Mon Sep 17 00:00:00 2001 From: James Myatt Date: Sun, 9 Dec 2018 14:41:29 +0000 Subject: [PATCH 03/10] DOC: Make any/all examples more consistent --- pandas/core/generic.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index aa65c744145da..43bb882a93bca 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10240,7 +10240,7 @@ def _doc_parms(cls): _all_examples = """\ Examples -------- -Series +**Series** >>> pd.Series([True, True]).all() True @@ -10250,8 +10250,10 @@ def _doc_parms(cls): True >>> pd.Series([np.nan]).all() True +>>> pd.Series([np.nan]).all(skipna=False) +True -DataFrames +**DataFrames** Create a dataframe from a dictionary. @@ -10606,6 +10608,8 @@ def _doc_parms(cls): For Series input, the output is a scalar indicating whether any element is True. +>>> pd.Series([False, False]).any() +False >>> pd.Series([True, False]).any() True >>> pd.Series([]).any() @@ -10615,7 +10619,6 @@ def _doc_parms(cls): >>> pd.Series([np.nan]).any(skipna=False) True - **DataFrame** Whether each column contains at least one True element (the default). From 736717c0687b9a54d40d1d19d53171203e2dcfed Mon Sep 17 00:00:00 2001 From: James Myatt Date: Sun, 9 Dec 2018 14:46:15 +0000 Subject: [PATCH 04/10] DOC: Make any/all description more consistent and precise --- pandas/core/generic.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 43bb882a93bca..f3962af6e1d5b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9884,7 +9884,7 @@ def _add_numeric_operations(cls): axis_descr, name, name2 = _doc_parms(cls) cls.any = _make_logical_function( - cls, 'any', name, name2, axis_descr, _any_desc, nanops.nanany, + cls, 'any', name, name2, axis_descr, _any_doc, nanops.nanany, _any_examples, _any_see_also, empty_value=False) cls.all = _make_logical_function( cls, 'all', name, name2, axis_descr, _all_doc, nanops.nanall, @@ -10234,8 +10234,9 @@ def _doc_parms(cls): _all_doc = """\ Return whether all elements are True, potentially over an axis. -Returns True if all elements within a series or along a Dataframe -axis are non-zero, not-empty or not-False.""" +Returns True unless there at least one element within a series or +along a Dataframe axis that is False or equivalent (e.g. zero or +empty).""" _all_examples = """\ Examples @@ -10594,11 +10595,12 @@ def _doc_parms(cls): DataFrame.all : Return whether all elements are True over requested axis. """ -_any_desc = """\ -Return whether any element is True over requested axis. +_any_doc = """\ +Return whether any element is True, potentially over an axis. -Unlike :meth:`DataFrame.all`, this performs an *or* operation. If any of the -values along the specified axis is True, this will return True.""" +Returns False unless there at least one element within a series or +along a Dataframe axis that is True or equivalent (e.g. non-zero or +non-empty).""" _any_examples = """\ Examples From 1b46cf70a79c2eeb2e231f68d99c1927b2ea5187 Mon Sep 17 00:00:00 2001 From: James Myatt Date: Sun, 9 Dec 2018 14:54:25 +0000 Subject: [PATCH 05/10] DOC: Improve returns information for any/all --- pandas/core/generic.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f3962af6e1d5b..85978f9d4be52 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10226,7 +10226,9 @@ def _doc_parms(cls): Returns ------- -%(outname)s : %(name1)s or %(name2)s (if level specified) +%(outname)s : %(name1)s or %(name2)s + If level is specified, then, %(name2)s is returned; otherwise, %(name1)s + is returned. %(see_also)s %(examples)s""" From e0aa82660ad21554b0ec73571d0a8470fa247ab9 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sun, 9 Dec 2018 16:31:10 +0000 Subject: [PATCH 06/10] DOC: Remove name for output Co-Authored-By: jamesmyatt --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d4c6abd564016..579608b593a2c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10236,7 +10236,7 @@ def _doc_parms(cls): Returns ------- -%(outname)s : %(name1)s or %(name2)s +%(name1)s or %(name2)s If level is specified, then, %(name2)s is returned; otherwise, %(name1)s is returned. From 7a8b8c1fad67d7b1e1387403586209614a8afa01 Mon Sep 17 00:00:00 2001 From: James Myatt Date: Sun, 9 Dec 2018 16:54:44 +0000 Subject: [PATCH 07/10] DOC: Rename any/all summary variables to `_*_desc` --- pandas/core/generic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 579608b593a2c..58a097191c2dd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9894,10 +9894,10 @@ def _add_numeric_operations(cls): axis_descr, name, name2 = _doc_parms(cls) cls.any = _make_logical_function( - cls, 'any', name, name2, axis_descr, _any_doc, nanops.nanany, + cls, 'any', name, name2, axis_descr, _any_desc, nanops.nanany, _any_examples, _any_see_also, empty_value=False) cls.all = _make_logical_function( - cls, 'all', name, name2, axis_descr, _all_doc, nanops.nanall, + cls, 'all', name, name2, axis_descr, _all_desc, nanops.nanall, _all_examples, _all_see_also, empty_value=True) @Substitution(outname='mad', @@ -10243,7 +10243,7 @@ def _doc_parms(cls): %(see_also)s %(examples)s""" -_all_doc = """\ +_all_desc = """\ Return whether all elements are True, potentially over an axis. Returns True unless there at least one element within a series or @@ -10607,7 +10607,7 @@ def _doc_parms(cls): DataFrame.all : Return whether all elements are True over requested axis. """ -_any_doc = """\ +_any_desc = """\ Return whether any element is True, potentially over an axis. Returns False unless there at least one element within a series or From 69ca617311b79e4bde4ed2ff86c35f80d04e9baf Mon Sep 17 00:00:00 2001 From: James Myatt Date: Sun, 9 Dec 2018 16:55:18 +0000 Subject: [PATCH 08/10] Re-organise to minimise change from master --- pandas/core/generic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 58a097191c2dd..8a0b0e73dd419 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10916,11 +10916,11 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs): return set_function_name(cum_func, name, cls) -def _make_logical_function(cls, name, name1, name2, axis_descr, - desc, f, examples, see_also, empty_value): +def _make_logical_function(cls, name, name1, name2, axis_descr, desc, f, + examples, see_also, empty_value): @Substitution(outname=name, desc=desc, name1=name1, name2=name2, - empty_value=empty_value, axis_descr=axis_descr, - examples=examples, see_also=see_also) + axis_descr=axis_descr, examples=examples, see_also=see_also, + empty_value=empty_value) @Appender(_bool_doc) def logical_func(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): From c3a1eeccd5ba9aa1a38395ff5b360581408035cb Mon Sep 17 00:00:00 2001 From: James Myatt Date: Mon, 10 Dec 2018 13:06:54 +0000 Subject: [PATCH 09/10] DOC: fix errors identified by validate_docstrings.py --- pandas/core/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8a0b0e73dd419..ed9d5acbf3f60 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10219,10 +10219,10 @@ def _doc_parms(cls): original index. * None : reduce all axes, return a scalar. -bool_only : boolean, default None +bool_only : bool, default None Include only boolean columns. If None, will attempt to use everything, then use only boolean data. Not implemented for Series. -skipna : boolean, default True +skipna : bool, default True Exclude NA/null values. If the entire row/column is NA and skipna is True, then the result will be %(empty_value)s, as for an empty row/column. If skipna is False, then NA are treated as True, because these are not From 32821431d56a5222be149ece45faa8f76361a5da Mon Sep 17 00:00:00 2001 From: James Myatt Date: Mon, 10 Dec 2018 14:44:32 +0000 Subject: [PATCH 10/10] Fix trailing whitespace --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ed9d5acbf3f60..9eb3eb37a01cc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10919,7 +10919,7 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs): def _make_logical_function(cls, name, name1, name2, axis_descr, desc, f, examples, see_also, empty_value): @Substitution(outname=name, desc=desc, name1=name1, name2=name2, - axis_descr=axis_descr, examples=examples, see_also=see_also, + axis_descr=axis_descr, examples=examples, see_also=see_also, empty_value=empty_value) @Appender(_bool_doc) def logical_func(self, axis=0, bool_only=None, skipna=True, level=None,