pandas-dev · jorisvandenbossche · Jun 26, 2018 · Jun 5, 2018 · Jun 14, 2018 · Jun 14, 2018
diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
@@ -10,6 +10,36 @@ and bug fixes. We recommend that all users upgrade to this version.
     :local:
     :backlinks: none
 
+.. _whatsnew_0232.enhancements:
+
+Logical Reductions over Entire DataFrame
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+:meth:`DataFrame.all` and :meth:`DataFrame.any` now accept ``axis=None`` to reduce over all axes to a scalar (:issue:`19976`)
+
+.. ipython:: python
+
+   df = pd.DataFrame({"A": [1, 2], "B": [True, False]})
+   df.all(axis=None)
+
+
+This also provides compatibility with NumPy 1.15, which now dispatches to ``DataFrame.all``.
+With NumPy 1.15 and pandas 0.23.1 or earlier, :func:`numpy.all` will not reduce over every axis:
+
+.. code-block:: python
+
+   >>> # NumPy 1.15, pandas 0.23.1
+   >>> np.any(pd.DataFrame({"A": [False], "B": [False]}))
+   A    False
+   B    False
+   dtype: bool
+
+With pandas 0.23.2, that will correctly return False.
+
+.. ipython:: python
+
+   np.any(pd.DataFrame({"A": [False], "B": [False]}))
+
 
 .. _whatsnew_0232.fixed_regressions:
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -6844,13 +6844,18 @@ def _count_level(self, level, axis=0, numeric_only=False):
 
     def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
                 filter_type=None, **kwds):
-        axis = self._get_axis_number(axis)
+        if axis is None and filter_type == 'bool':
+            labels = None
+            constructor = None
+        else:
+            # TODO: Make other agg func handle axis=None properly
+            axis = self._get_axis_number(axis)
+            labels = self._get_agg_axis(axis)
+            constructor = self._constructor
 
         def f(x):
             return op(x, axis=axis, skipna=skipna, **kwds)
 
-        labels = self._get_agg_axis(axis)
-
         # exclude timedelta/datetime unless we are uniform types
         if axis == 1 and self._is_mixed_type and self._is_datelike_mixed_type:
             numeric_only = True
@@ -6859,6 +6864,13 @@ def f(x):
             try:
                 values = self.values
                 result = f(values)
+
+                if (filter_type == 'bool' and values.dtype.kind == 'O' and
+                        axis is None):
+                    # work around https://github.com/numpy/numpy/issues/10489
+                    # TODO: combine with hasattr(result, 'dtype') further down
+                    # hard since we don't have `values` down there.
+                    result = np.bool_(result)
             except Exception as e:
 
                 # try by-column first
@@ -6925,7 +6937,9 @@ def f(x):
                 if axis == 0:
                     result = coerce_to_dtypes(result, self.dtypes)
 
-        return Series(result, index=labels)
+        if constructor is not None:
+            result = Series(result, index=labels)
+        return result
 
     def nunique(self, axis=0, dropna=True):
         """

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -8728,6 +8728,8 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
         return rs
 
     def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwargs):
+        if axis is None:
+            raise ValueError("Must specify 'axis' when aggregating by level.")
         grouped = self.groupby(level=level, axis=axis, sort=False)
         if hasattr(grouped, name) and skipna:
             return getattr(grouped, name)(**kwargs)
@@ -9055,8 +9057,16 @@ def _doc_parms(cls):
 
 Parameters
 ----------
-axis : int, default 0
-    Select the axis which can be 0 for indices and 1 for columns.
+axis : {None, 0 or 'index', 1 or 'columns'}, default None
+    Indicate which axis should be reduced. By default all axes are reduced
+    and a scalar is returned.
+
+    * None : reduce all axes, return a scalar.
+    * 0 / 'index' : reduce the index, return a Series whose index is the
+      original column labels.
+    * 1 / 'columns' : reduce the columns, return a Series whose index is the
+      original index.
+
 skipna : boolean, default True
     Exclude NA/null values. If an entire row/column is NA, the result
     will be NA.
@@ -9078,9 +9088,9 @@ def _doc_parms(cls):
 %(examples)s"""
 
 _all_doc = """\
-Return whether all elements are True over series or dataframe axis.
+Return whether all elements are True, potentially over an axis.
 
-Returns True if all elements within a series or along a dataframe
+Returns True if all elements within a series or along a Dataframe
 axis are non-zero, not-empty or not-False."""
 
 _all_examples = """\
@@ -9093,7 +9103,7 @@ def _doc_parms(cls):
 >>> pd.Series([True, False]).all()
 False
 
-Dataframes
+DataFrames
 
 Create a dataframe from a dictionary.
 
@@ -9110,12 +9120,17 @@ def _doc_parms(cls):
 col2    False
 dtype: bool
 
-Adding axis=1 argument will check if row-wise values all return True.
+Specify ``axis=1`` to check if row-wise values all return True.
 
 >>> df.all(axis=1)
 0     True
 1    False
 dtype: bool
+
+Or ``axis=None`` for whether every value is True.
+
+>>> df.all(axis=None)
+False
 """
 
 _all_see_also = """\
@@ -9481,6 +9496,11 @@ def _doc_parms(cls):
 1    False
 dtype: bool
 
+Aggregating over the entire DataFrame with ``axis=None``.
+
+>>> df.any(axis=None)
+True
+
 `any` for an empty DataFrame is an empty Series.
 
 >>> pd.DataFrame([]).any()
@@ -9651,22 +9671,17 @@ def _make_logical_function(cls, name, name1, name2, axis_descr, desc, f,
     @Substitution(outname=name, desc=desc, name1=name1, name2=name2,
                   axis_descr=axis_descr, examples=examples, see_also=see_also)
     @Appender(_bool_doc)
-    def logical_func(self, axis=None, bool_only=None, skipna=None, level=None,
+    def logical_func(self, axis=0, bool_only=None, skipna=True, level=None,
                      **kwargs):
         nv.validate_logical_func(tuple(), kwargs, fname=name)
-        if skipna is None:
-            skipna = True
-        if axis is None:
-            axis = self._stat_axis_number
         if level is not None:
             if bool_only is not None:
                 raise NotImplementedError("Option bool_only is not "
                                           "implemented with option level.")
             return self._agg_by_level(name, axis=axis, level=level,
                                       skipna=skipna)
-        return self._reduce(f, axis=axis, skipna=skipna,
-                            numeric_only=bool_only, filter_type='bool',
-                            name=name)
+        return self._reduce(f, name, axis=axis, skipna=skipna,
+                            numeric_only=bool_only, filter_type='bool')
 
     return set_function_name(logical_func, name, cls)
 

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -3238,6 +3238,8 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
 
         """
         delegate = self._values
+        if axis is None:
+            axis = self._stat_axis_number
         if isinstance(delegate, np.ndarray):
             # Validate that 'axis' is consistent with Series's single axis.
             self._get_axis_number(axis)

diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
@@ -1159,11 +1159,34 @@ def test_any_all(self):
         self._check_bool_op('any', np.any, has_skipna=True, has_bool_only=True)
         self._check_bool_op('all', np.all, has_skipna=True, has_bool_only=True)
 
-        df = DataFrame(randn(10, 4)) > 0
-        df.any(1)
-        df.all(1)
-        df.any(1, bool_only=True)
-        df.all(1, bool_only=True)
+        df = DataFrame({
+            'A': [True, False, False],
+            'B': [True, True, False],
+            'C': [True, True, True],
+        }, index=['a', 'b', 'c'])
+        result = df[['A', 'B']].any(1)
+        expected = Series([True, True, False], index=['a', 'b', 'c'])
+        tm.assert_series_equal(result, expected)
+
+        result = df[['A', 'B']].any(1, bool_only=True)
+        tm.assert_series_equal(result, expected)
+
+        result = df.all(1)
+        expected = Series([True, False, False], index=['a', 'b', 'c'])
+        tm.assert_series_equal(result, expected)
+
+        result = df.all(1, bool_only=True)
+        tm.assert_series_equal(result, expected)
+
+        # Axis is None
+        result = df.all(axis=None).item()
+        assert result is False
+
+        result = df.any(axis=None).item()
+        assert result is True
+
+        result = df[['C']].all(axis=None).item()
+        assert result is True
 
         # skip pathological failure cases
         # class CantNonzero(object):
@@ -1185,6 +1208,78 @@ def test_any_all(self):
         # df.any(1, bool_only=True)
         # df.all(1, bool_only=True)
 
+    @pytest.mark.parametrize('func, data, expected', [
+        (np.any, {}, False),
+        (np.all, {}, True),
+        (np.any, {'A': []}, False),
+        (np.all, {'A': []}, True),
+        (np.any, {'A': [False, False]}, False),
+        (np.all, {'A': [False, False]}, False),
+        (np.any, {'A': [True, False]}, True),
+        (np.all, {'A': [True, False]}, False),
+        (np.any, {'A': [True, True]}, True),
+        (np.all, {'A': [True, True]}, True),
+
+        (np.any, {'A': [False], 'B': [False]}, False),
+        (np.all, {'A': [False], 'B': [False]}, False),
+
+        (np.any, {'A': [False, False], 'B': [False, True]}, True),
+        (np.all, {'A': [False, False], 'B': [False, True]}, False),
+
+        # other types
+        (np.all, {'A': pd.Series([0.0, 1.0], dtype='float')}, False),
+        (np.any, {'A': pd.Series([0.0, 1.0], dtype='float')}, True),
+        (np.all, {'A': pd.Series([0, 1], dtype=int)}, False),
+        (np.any, {'A': pd.Series([0, 1], dtype=int)}, True),
+        (np.all, {'A': pd.Series([0, 1], dtype='M8[ns]')}, False),
+        (np.any, {'A': pd.Series([0, 1], dtype='M8[ns]')}, True),
+        (np.all, {'A': pd.Series([1, 2], dtype='M8[ns]')}, True),
+        (np.any, {'A': pd.Series([1, 2], dtype='M8[ns]')}, True),
+        (np.all, {'A': pd.Series([0, 1], dtype='m8[ns]')}, False),
+        (np.any, {'A': pd.Series([0, 1], dtype='m8[ns]')}, True),
+        (np.all, {'A': pd.Series([1, 2], dtype='m8[ns]')}, True),
+        (np.any, {'A': pd.Series([1, 2], dtype='m8[ns]')}, True),
+        (np.all, {'A': pd.Series([0, 1], dtype='category')}, False),
+        (np.any, {'A': pd.Series([0, 1], dtype='category')}, True),
+        (np.all, {'A': pd.Series([1, 2], dtype='category')}, True),
+        (np.any, {'A': pd.Series([1, 2], dtype='category')}, True),
+
+        # # Mix
+        # GH-21484
+        # (np.all, {'A': pd.Series([10, 20], dtype='M8[ns]'),
+        #           'B': pd.Series([10, 20], dtype='m8[ns]')}, True),
+    ])
+    def test_any_all_np_func(self, func, data, expected):
+        # https://github.com/pandas-dev/pandas/issues/19976
+        data = DataFrame(data)
+        result = func(data)
+        assert isinstance(result, np.bool_)
+        assert result.item() is expected
+
+        # method version
+        result = getattr(DataFrame(data), func.__name__)(axis=None)
+        assert isinstance(result, np.bool_)
+        assert result.item() is expected
+
+    def test_any_all_object(self):
+        # https://github.com/pandas-dev/pandas/issues/19976
+        result = np.all(DataFrame(columns=['a', 'b'])).item()
+        assert result is True
+
+        result = np.any(DataFrame(columns=['a', 'b'])).item()
+        assert result is False
+
+    @pytest.mark.parametrize('method', ['any', 'all'])
+    def test_any_all_level_axis_none_raises(self, method):
+        df = DataFrame(
+            {"A": 1},
+            index=MultiIndex.from_product([['A', 'B'], ['a', 'b']],
+                                          names=['out', 'in'])
+        )
+        xpr = "Must specify 'axis' when aggregating by level."
+        with tm.assert_raises_regex(ValueError, xpr):
+            getattr(df, method)(axis=None, level='out')
+
     def _check_bool_op(self, name, alternative, frame=None, has_skipna=True,
                        has_bool_only=False):
         if frame is None: