diff --git a/doc/source/options.rst b/doc/source/options.rst index 5edd28e559bc1..79145dfbf2939 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -86,7 +86,7 @@ pandas namespace. To change an option, call ``set_option('option regex', new_va pd.set_option('mode.sim_interactive', True) pd.get_option('mode.sim_interactive') -**Note:** that the option 'mode.sim_interactive' is mostly used for debugging purposes. +**Note:** that the option 'mode.sim_interactive' is mostly used for debugging purposes. All options also have a default value, and you can use ``reset_option`` to do just that: @@ -213,7 +213,8 @@ will be given. ``display.max_info_rows``: ``df.info()`` will usually show null-counts for each column. For large frames this can be quite slow. ``max_info_rows`` and ``max_info_cols`` -limit this null check only to frames with smaller dimensions then specified. +limit this null check only to frames with smaller dimensions then specified. Note that you +can specify the option ``df.info(null_counts=True)`` to override on showing a particular frame. .. ipython:: python diff --git a/doc/source/whatsnew/v0.15.1.txt b/doc/source/whatsnew/v0.15.1.txt index 7d01cb997b611..dee83f8bb75ea 100644 --- a/doc/source/whatsnew/v0.15.1.txt +++ b/doc/source/whatsnew/v0.15.1.txt @@ -159,6 +159,7 @@ Enhancements - Added support for 3-character ISO and non-standard country codes in :func:``io.wb.download()`` (:issue:`8482`) - :ref:`World Bank data requests ` now will warn/raise based on an ``errors`` argument, as well as a list of hard-coded country codes and the World Bank's JSON response. In prior versions, the error messages didn't look at the World Bank's JSON response. Problem-inducing input were simply dropped prior to the request. The issue was that many good countries were cropped in the hard-coded approach. All countries will work now, but some bad countries will raise exceptions because some edge cases break the entire response. (:issue:`8482`) - Added option to ``Series.str.split()`` to return a ``DataFrame`` rather than a ``Series`` (:issue:`8428`) +- Added option to ``df.info(null_counts=None|True|False)`` to override the default display options and force showing of the null-counts (:issue:`8701`) .. _whatsnew_0151.performance: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 29aad379c8424..4350d5aba3846 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1424,7 +1424,7 @@ def to_latex(self, buf=None, columns=None, col_space=None, colSpace=None, if buf is None: return formatter.buf.getvalue() - def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None): + def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None, null_counts=None): """ Concise summary of a DataFrame. @@ -1444,6 +1444,12 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None): the `display.memory_usage` setting. True or False overrides the `display.memory_usage` setting. Memory usage is shown in human-readable units (base-2 representation). + null_counts : boolean, default None + Whether to show the non-null counts + If None, then only show if the frame is smaller than max_info_rows and max_info_columns. + If True, always show counts. + If False, never show counts. + """ from pandas.core.format import _put_lines @@ -1469,8 +1475,11 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None): max_rows = get_option('display.max_info_rows', len(self) + 1) - show_counts = ((len(self.columns) <= max_cols) and - (len(self) < max_rows)) + if null_counts is None: + show_counts = ((len(self.columns) <= max_cols) and + (len(self) < max_rows)) + else: + show_counts = null_counts exceeds_info_cols = len(self.columns) > max_cols def _verbose_repr(): diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index 89d08d37e0a30..47f9762eb0fa3 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -117,6 +117,26 @@ def test_eng_float_formatter(self): repr(self.frame) self.reset_display_options() + def test_show_null_counts(self): + + df = DataFrame(1,columns=range(10),index=range(10)) + df.iloc[1,1] = np.nan + + def check(null_counts, result): + buf = StringIO() + r = df.info(buf=buf,null_counts=null_counts) + self.assertTrue(('non-null' in buf.getvalue()) is result) + + with option_context('display.max_info_rows',20,'display.max_info_columns',20): + check(None, True) + check(True, True) + check(False, False) + + with option_context('display.max_info_rows',5,'display.max_info_columns',5): + check(None, False) + check(True, False) + check(False, False) + def test_repr_tuples(self): buf = StringIO()