diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 114210d75959b..44edc0e4e9c4a 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -1,5 +1,6 @@ import pandas.core.config as cf -from pandas.core.config import is_int, is_bool, is_text, is_float +from pandas.core.config import (is_int, is_bool, is_text, is_float, + is_instance_factory) from pandas.core.format import detect_console_encoding """ @@ -123,10 +124,24 @@ will be displayed as exactly 0 by repr and friends. """ +pc_max_info_rows_doc = """ +: int or None + max_info_rows is the maximum number of rows for which a frame will + perform a null check on its columns when repr'ing To a console. + The default is 1,000,000 rows. So, if a DataFrame has more + 1,000,000 rows there will be no null check performed on the + columns and thus the representation will take much less time to + display in an interactive session. A value of None means always + perform a null check when repr'ing. +""" + + with cf.config_prefix('display'): cf.register_option('precision', 7, pc_precision_doc, validator=is_int) cf.register_option('float_format', None, float_format_doc) cf.register_option('column_space', 12, validator=is_int) + cf.register_option('max_info_rows', 1000000, pc_max_info_rows_doc, + validator=is_instance_factory((int, type(None)))) cf.register_option('max_rows', 100, pc_max_rows_doc, validator=is_int) cf.register_option('max_colwidth', 50, max_colwidth_doc, validator=is_int) cf.register_option('max_columns', 20, pc_max_cols_doc, validator=is_int) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f4a077c61e9cc..ea79236989b96 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -51,7 +51,7 @@ import pandas.tslib as tslib import pandas.algos as _algos -from pandas.core.config import get_option +from pandas.core.config import get_option, set_option #---------------------------------------------------------------------- @@ -331,7 +331,6 @@ def f(self, other): class DataFrame(NDFrame): _auto_consolidate = True - _verbose_info = True _het_axis = 1 _info_axis = 'columns' _col_klass = Series @@ -560,6 +559,22 @@ def _wrap_array(self, arr, axes, copy=False): index, columns = axes return self._constructor(arr, index=index, columns=columns, copy=copy) + @property + def _verbose_info(self): + import warnings + warnings.warn('The _verbose_info property will be removed in version ' + '0.12', FutureWarning) + return get_option('display.max_info_rows') is None + + @_verbose_info.setter + def _verbose_info(self, value): + import warnings + warnings.warn('The _verbose_info property will be removed in version ' + '0.12', FutureWarning) + + value = None if value else 1000000 + set_option('display.max_info_rows', value) + @property def axes(self): return [self.index, self.columns] @@ -652,7 +667,9 @@ def __unicode__(self): """ buf = StringIO(u"") if self._need_info_repr_(): - self.info(buf=buf, verbose=self._verbose_info) + max_info_rows = get_option('display.max_info_rows') + verbose = max_info_rows is None or self.shape[0] <= max_info_rows + self.info(buf=buf, verbose=verbose) else: is_wide = self._need_wide_repr() line_width = None diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index f142b36534e22..6e003d5a032db 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -66,7 +66,6 @@ class SparseDataFrame(DataFrame): Default fill_value for converting Series to SparseSeries. Will not override SparseSeries passed in """ - _verbose_info = False _columns = None _series = None _is_mixed_type = False diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index c31f4e3b8061d..1499a2fdce1fb 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -470,6 +470,44 @@ def test_frame_info_encoding(self): repr(df.T) fmt.set_printoptions(max_rows=200) + def test_large_frame_repr(self): + def wrap_rows_options(f): + def _f(*args, **kwargs): + old_max_rows = pd.get_option('display.max_rows') + old_max_info_rows = pd.get_option('display.max_info_rows') + o = f(*args, **kwargs) + pd.set_option('display.max_rows', old_max_rows) + pd.set_option('display.max_info_rows', old_max_info_rows) + return o + return _f + + @wrap_rows_options + def test_setting(value, nrows=3, ncols=2): + if value is None: + expected_difference = 0 + elif isinstance(value, int): + expected_difference = ncols + else: + raise ValueError("'value' must be int or None") + + pd.set_option('display.max_rows', nrows - 1) + pd.set_option('display.max_info_rows', value) + + smallx = DataFrame(np.random.rand(nrows, ncols)) + repr_small = repr(smallx) + + bigx = DataFrame(np.random.rand(nrows + 1, ncols)) + repr_big = repr(bigx) + + diff = len(repr_small.splitlines()) - len(repr_big.splitlines()) + + # the difference in line count is the number of columns + self.assertEqual(diff, expected_difference) + + test_setting(None) + test_setting(3) + self.assertRaises(ValueError, test_setting, 'string') + def test_wide_repr(self): with option_context('mode.sim_interactive', True): col = lambda l, k: [tm.rands(k) for _ in xrange(l)]