diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 688935c6b104d..262c439cde636 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -675,10 +675,7 @@ values NOT in the categories, similarly to how you can reindex ANY pandas index. }).set_index('B') In [11]: df3.index - Out[11]: - CategoricalIndex([u'a', u'a', u'b', u'b', u'c', u'a'], - categories=[u'a', u'b', u'c'], - ordered=False) + Out[11]: CategoricalIndex([u'a', u'a', u'b', u'b', u'c', u'a'], categories=[u'a', u'b', u'c'], ordered=False, name=u'B', dtype='category') In [12]: pd.concat([df2,df3] TypeError: categories must match existing categories when appending diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt index 4ccaf7a4719c9..a1e893f713390 100755 --- a/doc/source/whatsnew/v0.16.1.txt +++ b/doc/source/whatsnew/v0.16.1.txt @@ -13,7 +13,10 @@ Highlights include: - New section on how-to-contribute to *pandas*, see :ref:`here ` - Revised "Merge, join, and concatenate" documentation, including graphical examples to make it easier to understand each operations, see :ref:`here ` - New method ``sample`` for drawing random samples from Series, DataFrames and Panels. See :ref:`here ` -- ``BusinessHour`` date-offset is now supported, see :ref:`here ` +- The default ``Index`` printing has changed to a more uniform format, see :ref:`here ` +- ``BusinessHour`` datetime-offset is now supported, see :ref:`here ` + +>>>>>>> more fixes - Further enhancement to the ``.str`` accessor to make string operations easier, see :ref:`here ` .. contents:: What's new in v0.16.1 @@ -268,6 +271,52 @@ API changes - By default, ``read_csv`` and ``read_table`` will now try to infer the compression type based on the file extension. Set ``compression=None`` to restore the previous behavior (no decompression). (:issue:`9770`) +.. _whatsnew_0161.index_repr: + +Index Representation +~~~~~~~~~~~~~~~~~~~~ + +The string representation of ``Index`` and its sub-classes have now been unified. These will show a single-line display if there are few values; a wrapped multi-line display for a lot of values (but less than ``display.max_seq_items``; if lots of items (> ``display.max_seq_items``) will show a truncated display (the head and tail of the data). The formatting for ``MultiIndex`` is unchanges (a multi-line wrapped display). The display width responds to the option ``display.max_seq_items``, which is defaulted to 100. (:issue:`6482`) + +Previous Behavior + +.. code-block:: python + + In [2]: pd.Index(range(4),name='foo') + Out[2]: Int64Index([0, 1, 2, 3], dtype='int64') + + In [3]: pd.Index(range(104),name='foo') + Out[3]: Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, ...], dtype='int64') + + In [4]: pd.date_range('20130101',periods=4,name='foo',tz='US/Eastern') + Out[4]: + + [2013-01-01 00:00:00-05:00, ..., 2013-01-04 00:00:00-05:00] + Length: 4, Freq: D, Timezone: US/Eastern + + In [5]: pd.date_range('20130101',periods=104,name='foo',tz='US/Eastern') + Out[5]: + + [2013-01-01 00:00:00-05:00, ..., 2013-04-14 00:00:00-04:00] + Length: 104, Freq: D, Timezone: US/Eastern + +New Behavior + +.. ipython:: python + + pd.set_option('display.width',100) + pd.Index(range(4),name='foo') + pd.Index(range(25),name='foo') + pd.Index(range(104),name='foo') + pd.Index(['datetime', 'sA', 'sB', 'sC', 'flow', 'error', 'temp', 'ref', 'a_bit_a_longer_one']*2) + pd.CategoricalIndex(['a','bb','ccc','dddd'],ordered=True,name='foobar') + pd.CategoricalIndex(['a','bb','ccc','dddd']*10,ordered=True,name='foobar') + pd.CategoricalIndex(['a','bb','ccc','dddd']*100,ordered=True,name='foobar') + pd.CategoricalIndex(np.arange(1000),ordered=True,name='foobar') + pd.date_range('20130101',periods=4,name='foo',tz='US/Eastern') + pd.date_range('20130101',periods=25,name='foo',tz='US/Eastern') + pd.date_range('20130101',periods=104,name='foo',tz='US/Eastern') + .. _whatsnew_0161.deprecations: Deprecations diff --git a/pandas/core/common.py b/pandas/core/common.py index 8535cf1566e2d..3c92300d1f9a5 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -3132,7 +3132,7 @@ def in_ipython_frontend(): # working with straight ascii. -def _pprint_seq(seq, _nest_lvl=0, **kwds): +def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds): """ internal. pprinter for iterables. you should probably use pprint_thing() rather then calling this directly. @@ -3144,12 +3144,15 @@ def _pprint_seq(seq, _nest_lvl=0, **kwds): else: fmt = u("[%s]") if hasattr(seq, '__setitem__') else u("(%s)") - nitems = get_option("max_seq_items") or len(seq) + if max_seq_items is False: + nitems = len(seq) + else: + nitems = max_seq_items or get_option("max_seq_items") or len(seq) s = iter(seq) r = [] for i in range(min(nitems, len(seq))): # handle sets, no slicing - r.append(pprint_thing(next(s), _nest_lvl + 1, **kwds)) + r.append(pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)) body = ", ".join(r) if nitems < len(seq): @@ -3160,7 +3163,7 @@ def _pprint_seq(seq, _nest_lvl=0, **kwds): return fmt % body -def _pprint_dict(seq, _nest_lvl=0, **kwds): +def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds): """ internal. pprinter for iterables. you should probably use pprint_thing() rather then calling this directly. @@ -3170,11 +3173,14 @@ def _pprint_dict(seq, _nest_lvl=0, **kwds): pfmt = u("%s: %s") - nitems = get_option("max_seq_items") or len(seq) + if max_seq_items is False: + nitems = len(seq) + else: + nitems = max_seq_items or get_option("max_seq_items") or len(seq) for k, v in list(seq.items())[:nitems]: - pairs.append(pfmt % (pprint_thing(k, _nest_lvl + 1, **kwds), - pprint_thing(v, _nest_lvl + 1, **kwds))) + pairs.append(pfmt % (pprint_thing(k, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds), + pprint_thing(v, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds))) if nitems < len(seq): return fmt % (", ".join(pairs) + ", ...") @@ -3183,7 +3189,7 @@ def _pprint_dict(seq, _nest_lvl=0, **kwds): def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False, - quote_strings=False): + quote_strings=False, max_seq_items=None): """ This function is the sanctioned way of converting objects to a unicode representation. @@ -3202,6 +3208,8 @@ def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False, replacements default_escapes : bool, default False Whether the input escape characters replaces or adds to the defaults + max_seq_items : False, int, default None + Pass thru to other pretty printers to limit sequence printing Returns ------- @@ -3240,11 +3248,11 @@ def as_escaped_unicode(thing, escape_chars=escape_chars): return compat.text_type(thing) elif (isinstance(thing, dict) and _nest_lvl < get_option("display.pprint_nest_depth")): - result = _pprint_dict(thing, _nest_lvl, quote_strings=True) + result = _pprint_dict(thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items) elif is_sequence(thing) and _nest_lvl < \ get_option("display.pprint_nest_depth"): result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars, - quote_strings=quote_strings) + quote_strings=quote_strings, max_seq_items=max_seq_items) elif isinstance(thing, compat.string_types) and quote_strings: if compat.PY3: fmt = "'%s'" diff --git a/pandas/core/index.py b/pandas/core/index.py index 9ee040ed30ccd..21f1fed2cd6da 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -8,6 +8,7 @@ from pandas import compat import numpy as np +from math import ceil from sys import getsizeof import pandas.tslib as tslib import pandas.lib as lib @@ -26,8 +27,10 @@ from pandas.io.common import PerformanceWarning # simplify -default_pprint = lambda x: com.pprint_thing(x, escape_chars=('\t', '\r', '\n'), - quote_strings=True) +default_pprint = lambda x, max_seq_items=None: com.pprint_thing(x, + escape_chars=('\t', '\r', '\n'), + quote_strings=True, + max_seq_items=max_seq_items) __all__ = ['Index'] @@ -392,8 +395,150 @@ def __unicode__(self): Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3. """ - prepr = default_pprint(self) - return "%s(%s, dtype='%s')" % (type(self).__name__, prepr, self.dtype) + klass = self.__class__.__name__ + data = self._format_data() + attrs = self._format_attrs() + space = self._format_space() + + prepr = (u(",%s") % space).join([u("%s=%s") % (k, v) + for k, v in attrs]) + + # no data provided, just attributes + if data is None: + data = '' + + res = u("%s(%s%s)") % (klass, + data, + prepr) + + return res + + def _format_space(self): + + # using space here controls if the attributes + # are line separated or not (the default) + + #max_seq_items = get_option('display.max_seq_items') + #if len(self) > max_seq_items: + # space = "\n%s" % (' ' * (len(klass) + 1)) + return " " + + @property + def _formatter_func(self): + """ + Return the formatted data as a unicode string + """ + return default_pprint + + def _format_data(self): + """ + Return the formatted data as a unicode string + """ + from pandas.core.format import get_console_size + display_width, _ = get_console_size() + if display_width is None: + display_width = get_option('display.width') or 80 + + space1 = "\n%s" % (' ' * (len(self.__class__.__name__) + 1)) + space2 = "\n%s" % (' ' * (len(self.__class__.__name__) + 2)) + + n = len(self) + sep = ',' + max_seq_items = get_option('display.max_seq_items') + formatter = self._formatter_func + + # do we want to justify (only do so for non-objects) + is_justify = not (self.inferred_type == 'string' or self.inferred_type == 'categorical' and is_object_dtype(self.categories)) + + # are we a truncated display + is_truncated = n > max_seq_items + + def _extend_line(s, line, value, display_width, next_line_prefix): + + if len(line.rstrip()) + len(value.rstrip()) >= display_width: + s += line.rstrip() + line = next_line_prefix + line += value + return s, line + + def best_len(values): + if values: + return max([len(x) for x in values]) + else: + return 0 + + if n == 0: + summary = '[], ' + elif n == 1: + first = formatter(self[0]) + summary = '[%s], ' % first + elif n == 2: + first = formatter(self[0]) + last = formatter(self[-1]) + summary = '[%s, %s], ' % (first, last) + else: + + if n > max_seq_items: + n = min(max_seq_items//2,10) + head = [ formatter(x) for x in self[:n] ] + tail = [ formatter(x) for x in self[-n:] ] + else: + head = [] + tail = [ formatter(x) for x in self ] + + # adjust all values to max length if needed + if is_justify: + + # however, if we are not truncated and we are only a single line, then don't justify + if is_truncated or not (len(', '.join(head)) < display_width and len(', '.join(tail)) < display_width): + max_len = max(best_len(head), best_len(tail)) + head = [x.rjust(max_len) for x in head] + tail = [x.rjust(max_len) for x in tail] + + summary = "" + line = space2 + + for i in range(len(head)): + word = head[i] + sep + ' ' + summary, line = _extend_line(summary, line, word, + display_width, space2) + if is_truncated: + summary += line + space2 + '...' + line = space2 + + for i in range(len(tail)-1): + word = tail[i] + sep + ' ' + summary, line = _extend_line(summary, line, word, + display_width, space2) + + # last value: no sep added + 1 space of width used for trailing ',' + summary, line = _extend_line(summary, line, tail[-1], + display_width - 2, space2) + summary += line + summary += '],' + + if len(summary) > (display_width): + summary += space1 + else: # one row + summary += ' ' + + # remove initial space + summary = '[' + summary[len(space2):] + + return summary + + def _format_attrs(self): + """ + Return a list of tuples of the (attr,formatted_value) + """ + attrs = [] + attrs.append(('dtype',"'%s'" % self.dtype)) + if self.name is not None: + attrs.append(('name',default_pprint(self.name))) + max_seq_items = get_option('display.max_seq_items') + if len(self) > max_seq_items: + attrs.append(('length',len(self))) + return attrs def to_series(self, **kwargs): """ @@ -2800,32 +2945,21 @@ def equals(self, other): return False - def __unicode__(self): + def _format_attrs(self): """ - Return a string representation for this object. - - Invoked by unicode(df) in py2 only. Yields a Unicode String in both - py2/py3. + Return a list of tuples of the (attr,formatted_value) """ - - # currently doesn't use the display.max_categories, or display.max_seq_len - # for head/tail printing - values = default_pprint(self.values.get_values()) - cats = default_pprint(self.categories.get_values()) - space = ' ' * (len(self.__class__.__name__) + 1) - name = self.name - if name is not None: - name = default_pprint(name) - - result = u("{klass}({values},\n{space}categories={categories},\n{space}ordered={ordered},\n{space}name={name})").format( - klass=self.__class__.__name__, - values=values, - categories=cats, - ordered=self.ordered, - name=name, - space=space) - - return result + max_categories = (10 if get_option("display.max_categories") == 0 + else get_option("display.max_categories")) + attrs = [('categories', default_pprint(self.categories, max_seq_items=max_categories)), + ('ordered',self.ordered)] + if self.name is not None: + attrs.append(('name',default_pprint(self.name))) + attrs.append(('dtype',"'%s'" % self.dtype)) + max_seq_items = get_option('display.max_seq_items') + if len(self) > max_seq_items: + attrs.append(('length',len(self))) + return attrs @property def inferred_type(self): @@ -3877,40 +4011,24 @@ def nbytes(self): names_nbytes = sum(( getsizeof(i) for i in self.names )) return level_nbytes + label_nbytes + names_nbytes - def __repr__(self): - encoding = get_option('display.encoding') - attrs = [('levels', default_pprint(self.levels)), - ('labels', default_pprint(self.labels))] + def _format_attrs(self): + """ + Return a list of tuples of the (attr,formatted_value) + """ + attrs = [('levels', default_pprint(self._levels, max_seq_items=False)), + ('labels', default_pprint(self._labels, max_seq_items=False))] if not all(name is None for name in self.names): attrs.append(('names', default_pprint(self.names))) if self.sortorder is not None: attrs.append(('sortorder', default_pprint(self.sortorder))) + return attrs - space = ' ' * (len(self.__class__.__name__) + 1) - prepr = (u(",\n%s") % space).join([u("%s=%s") % (k, v) - for k, v in attrs]) - res = u("%s(%s)") % (self.__class__.__name__, prepr) + def _format_space(self): + return "\n%s" % (' ' * (len(self.__class__.__name__) + 1)) - if not compat.PY3: - # needs to be str in Python 2 - res = res.encode(encoding) - return res - - def __unicode__(self): - """ - Return a string representation for a particular Index - - Invoked by unicode(df) in py2 only. Yields a Unicode String in both - py2/py3. - """ - rows = self.format(names=True) - max_rows = get_option('display.max_rows') - if len(rows) > max_rows: - spaces = (len(rows[0]) - 3) // 2 - centered = ' ' * spaces - half = max_rows // 2 - rows = rows[:half] + [centered + '...' + centered] + rows[-half:] - return "\n".join(rows) + def _format_data(self): + # we are formatting thru the attributes + return None def __len__(self): return len(self.labels[0]) diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index f0afef71b3381..fd9d9546ba235 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -3215,13 +3215,13 @@ def test_date_explict_date_format(self): class TestDatetimeIndexUnicode(tm.TestCase): def test_dates(self): text = str(pd.to_datetime([datetime(2013,1,1), datetime(2014,1,1)])) - self.assertTrue("[2013-01-01," in text) - self.assertTrue(", 2014-01-01]" in text) + self.assertTrue("['2013-01-01'," in text) + self.assertTrue(", '2014-01-01']" in text) def test_mixed(self): text = str(pd.to_datetime([datetime(2013,1,1), datetime(2014,1,1,12), datetime(2014,1,1)])) - self.assertTrue("[2013-01-01 00:00:00," in text) - self.assertTrue(", 2014-01-01 00:00:00]" in text) + self.assertTrue("'2013-01-01 00:00:00'," in text) + self.assertTrue("'2014-01-01 00:00:00']" in text) class TestStringRepTimestamp(tm.TestCase): diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 0c8c8be5217c3..444aa2a0bab1e 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -120,6 +120,19 @@ def test_ndarray_compat_properties(self): idx.nbytes idx.values.nbytes + def test_repr_roundtrip(self): + + idx = self.create_index() + tm.assert_index_equal(eval(repr(idx)),idx) + + def test_str(self): + + # test the string repr + idx = self.create_index() + idx.name = 'foo' + self.assertTrue("'foo'" in str(idx)) + self.assertTrue(idx.__class__.__name__ in str(idx)) + def test_wrong_number_names(self): def testit(ind): ind.names = ["apple", "banana", "carrot"] @@ -1699,7 +1712,7 @@ def test_get_indexer(self): self.assertRaises(NotImplementedError, lambda : idx2.get_indexer(idx1, method='backfill')) self.assertRaises(NotImplementedError, lambda : idx2.get_indexer(idx1, method='nearest')) - def test_repr(self): + def test_repr_roundtrip(self): ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) str(ci) @@ -1712,9 +1725,12 @@ def test_repr(self): compat.text_type(ci) # long format + # this is not reprable ci = CategoricalIndex(np.random.randint(0,5,size=100)) - result = str(ci) - tm.assert_index_equal(eval(repr(ci)),ci,exact=True) + if compat.PY3: + str(ci) + else: + compat.text_type(ci) def test_isin(self): @@ -2448,7 +2464,7 @@ def test_print_unicode_columns(self): def test_repr_summary(self): with cf.option_context('display.max_seq_items', 10): r = repr(pd.Index(np.arange(1000))) - self.assertTrue(len(r) < 100) + self.assertTrue(len(r) < 200) self.assertTrue("..." in r) def test_repr_roundtrip(self): @@ -2475,6 +2491,23 @@ def test_slice_keep_name(self): class DatetimeLike(Base): + def test_str(self): + + # test the string repr + idx = self.create_index() + idx.name = 'foo' + self.assertFalse("length=%s" % len(idx) in str(idx)) + self.assertTrue("'foo'" in str(idx)) + self.assertTrue(idx.__class__.__name__ in str(idx)) + + if hasattr(idx,'tz'): + if idx.tz is not None: + self.assertTrue("tz='%s'" % idx.tz in str(idx)) + else: + self.assertTrue("tz=None" in str(idx)) + if hasattr(idx,'freq'): + self.assertTrue("freq='%s'" % idx.freqstr in str(idx)) + def test_view(self): super(DatetimeLike, self).test_view() @@ -4389,7 +4422,25 @@ def test_repr_with_unicode_data(self): self.assertFalse("\\u" in repr(index)) # we don't want unicode-escaped def test_repr_roundtrip(self): - tm.assert_index_equal(eval(repr(self.index)), self.index) + + mi = MultiIndex.from_product([list('ab'),range(3)],names=['first','second']) + str(mi) + tm.assert_index_equal(eval(repr(mi)),mi,exact=True) + + # formatting + if compat.PY3: + str(mi) + else: + compat.text_type(mi) + + # long format + mi = MultiIndex.from_product([list('abcdefg'),range(10)],names=['first','second']) + result = str(mi) + tm.assert_index_equal(eval(repr(mi)),mi,exact=True) + + def test_str(self): + # tested elsewhere + pass def test_unicode_string_with_unicode(self): d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index 5f3130bd2dd9c..f3a7aa0bfa4c6 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -17,6 +17,7 @@ infer_freq, to_offset, get_period_alias, Resolution) import pandas.algos as _algos +from pandas.core.config import get_option class DatetimeIndexOpsMixin(object): """ common ops mixin to support a unified inteface datetimelike Index """ @@ -79,9 +80,9 @@ def freqstr(self): @cache_readonly def inferred_freq(self): - """ - Trys to return a string representing a frequency guess, - generated by infer_freq. Returns None if it can't autodetect the + """ + Trys to return a string representing a frequency guess, + generated by infer_freq. Returns None if it can't autodetect the frequency. """ try: @@ -260,35 +261,25 @@ def argmax(self, axis=None): @property def _formatter_func(self): - """ - Format function to convert value to representation - """ - return str - - def _format_footer(self): raise AbstractMethodError(self) - def __unicode__(self): - formatter = self._formatter_func - summary = str(self.__class__) + '\n' - - n = len(self) - if n == 0: - pass - elif n == 1: - first = formatter(self[0]) - summary += '[%s]\n' % first - elif n == 2: - first = formatter(self[0]) - last = formatter(self[-1]) - summary += '[%s, %s]\n' % (first, last) - else: - first = formatter(self[0]) - last = formatter(self[-1]) - summary += '[%s, ..., %s]\n' % (first, last) - - summary += self._format_footer() - return summary + def _format_attrs(self): + """ + Return a list of tuples of the (attr,formatted_value) + """ + attrs = super(DatetimeIndexOpsMixin, self)._format_attrs() + for attrib in self._attributes: + if attrib == 'freq': + freq = self.freqstr + if freq is not None: + freq = "'%s'" % freq + attrs.append(('freq',freq)) + elif attrib == 'tz': + tz = self.tz + if tz is not None: + tz = "'%s'" % tz + attrs.append(('tz',tz)) + return attrs @cache_readonly def _resolution(self): @@ -510,4 +501,6 @@ def summary(self, name=None): if self.freq: result += '\nFreq: %s' % self.freqstr + # display as values, not quoted + result = result.replace("'","") return result diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 7b0ff578b0d90..f56b40a70d551 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -597,7 +597,7 @@ def _is_dates_only(self): def _formatter_func(self): from pandas.core.format import _get_format_datetime64 formatter = _get_format_datetime64(is_dates_only=self._is_dates_only) - return lambda x: formatter(x, tz=self.tz) + return lambda x: "'%s'" % formatter(x, tz=self.tz) def __reduce__(self): @@ -684,10 +684,6 @@ def _format_native_types(self, na_rep=u('NaT'), def to_datetime(self, dayfirst=False): return self.copy() - def _format_footer(self): - tagline = 'Length: %d, Freq: %s, Timezone: %s' - return tagline % (len(self), self.freqstr, self.tz) - def astype(self, dtype): dtype = np.dtype(dtype) diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index a4b754f5a6bbd..d57ca8d3aa7b6 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -293,6 +293,10 @@ def _to_embed(self, keep_tz=False): """ return an array repr of this object, potentially casting to object """ return self.asobject.values + @property + def _formatter_func(self): + return lambda x: "'%s'" % x + def asof_locs(self, where, mask): """ where : array of timestamps @@ -697,10 +701,6 @@ def __array_finalize__(self, obj): self.name = getattr(obj, 'name', None) self._reset_identity() - def _format_footer(self): - tagline = 'Length: %d, Freq: %s' - return tagline % (len(self), self.freqstr) - def take(self, indices, axis=None): """ Analogous to ndarray.take diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index 86c427682c553..80475fc8426db 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -274,10 +274,6 @@ def _formatter_func(self): from pandas.core.format import _get_format_timedelta64 return _get_format_timedelta64(self, box=True) - def _format_footer(self): - tagline = 'Length: %d, Freq: %s' - return tagline % (len(self), self.freqstr) - def __setstate__(self, state): """Necessary for making this object picklable""" if isinstance(state, dict): diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index 677173e9a75e9..d1b986e7a7a1c 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -119,29 +119,24 @@ def test_representation(self): idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='US/Eastern') - exp1 = """ -Length: 0, Freq: D, Timezone: None""" - exp2 = """ -[2011-01-01] -Length: 1, Freq: D, Timezone: None""" - exp3 = """ -[2011-01-01, 2011-01-02] -Length: 2, Freq: D, Timezone: None""" - exp4 = """ -[2011-01-01, ..., 2011-01-03] -Length: 3, Freq: D, Timezone: None""" - exp5 = """ -[2011-01-01 09:00:00+09:00, ..., 2011-01-01 11:00:00+09:00] -Length: 3, Freq: H, Timezone: Asia/Tokyo""" - exp6 = """ -[2011-01-01 09:00:00-05:00, ..., NaT] -Length: 3, Freq: None, Timezone: US/Eastern""" + exp1 = """DatetimeIndex([], dtype='datetime64[ns]', freq='D', tz=None)""" - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6], - [exp1, exp2, exp3, exp4, exp5, exp6]): - for func in ['__repr__', '__unicode__', '__str__']: - result = getattr(idx, func)() - self.assertEqual(result, expected) + exp2 = """DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', freq='D', tz=None)""" + + exp3 = """DatetimeIndex(['2011-01-01', '2011-01-02'], dtype='datetime64[ns]', freq='D', tz=None)""" + + exp4 = """DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], dtype='datetime64[ns]', freq='D', tz=None)""" + + exp5 = """DatetimeIndex(['2011-01-01 09:00:00+09:00', '2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00'], dtype='datetime64[ns]', freq='H', tz='Asia/Tokyo')""" + + exp6 = """DatetimeIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', 'NaT'], dtype='datetime64[ns]', freq=None, tz='US/Eastern')""" + + with pd.option_context('display.width', 300): + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6], + [exp1, exp2, exp3, exp4, exp5, exp6]): + for func in ['__repr__', '__unicode__', '__str__']: + result = getattr(idx, func)() + self.assertEqual(result, expected) def test_summary(self): # GH9116 @@ -372,27 +367,22 @@ def test_representation(self): idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) + exp1 = """TimedeltaIndex([], dtype='timedelta64[ns]', freq='D')""" - exp1 = """ -Length: 0, Freq: D""" - exp2 = """ -['1 days'] -Length: 1, Freq: D""" - exp3 = """ -['1 days', '2 days'] -Length: 2, Freq: D""" - exp4 = """ -['1 days', ..., '3 days'] -Length: 3, Freq: D""" - exp5 = """ -['1 days 00:00:01', ..., '3 days 00:00:00'] -Length: 3, Freq: None""" + exp2 = """TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', freq='D')""" - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], - [exp1, exp2, exp3, exp4, exp5]): - for func in ['__repr__', '__unicode__', '__str__']: - result = getattr(idx, func)() - self.assertEqual(result, expected) + exp3 = """TimedeltaIndex(['1 days', '2 days'], dtype='timedelta64[ns]', freq='D')""" + + exp4 = """TimedeltaIndex(['1 days', '2 days', '3 days'], dtype='timedelta64[ns]', freq='D')""" + + exp5 = """TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', '3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)""" + + with pd.option_context('display.width',300): + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], + [exp1, exp2, exp3, exp4, exp5]): + for func in ['__repr__', '__unicode__', '__str__']: + result = getattr(idx, func)() + self.assertEqual(result, expected) def test_summary(self): # GH9116 @@ -404,13 +394,13 @@ def test_summary(self): exp1 = """TimedeltaIndex: 0 entries Freq: D""" - exp2 = """TimedeltaIndex: 1 entries, '1 days' to '1 days' + exp2 = """TimedeltaIndex: 1 entries, 1 days to 1 days Freq: D""" - exp3 = """TimedeltaIndex: 2 entries, '1 days' to '2 days' + exp3 = """TimedeltaIndex: 2 entries, 1 days to 2 days Freq: D""" - exp4 = """TimedeltaIndex: 3 entries, '1 days' to '3 days' + exp4 = """TimedeltaIndex: 3 entries, 1 days to 3 days Freq: D""" - exp5 = """TimedeltaIndex: 3 entries, '1 days 00:00:01' to '3 days 00:00:00'""" + exp5 = """TimedeltaIndex: 3 entries, 1 days 00:00:01 to 3 days 00:00:00""" for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], [exp1, exp2, exp3, exp4, exp5]): @@ -842,32 +832,23 @@ def test_representation(self): idx8 = pd.period_range('2013Q1', periods=2, freq="Q") idx9 = pd.period_range('2013Q1', periods=3, freq="Q") - exp1 = """ -Length: 0, Freq: D""" - exp2 = """ -[2011-01-01] -Length: 1, Freq: D""" - exp3 = """ -[2011-01-01, 2011-01-02] -Length: 2, Freq: D""" - exp4 = """ -[2011-01-01, ..., 2011-01-03] -Length: 3, Freq: D""" - exp5 = """ -[2011, ..., 2013] -Length: 3, Freq: A-DEC""" - exp6 = """ -[2011-01-01 09:00, ..., NaT] -Length: 3, Freq: H""" - exp7 = """ -[2013Q1] -Length: 1, Freq: Q-DEC""" - exp8 = """ -[2013Q1, 2013Q2] -Length: 2, Freq: Q-DEC""" - exp9 = """ -[2013Q1, ..., 2013Q3] -Length: 3, Freq: Q-DEC""" + exp1 = """PeriodIndex([], dtype='int64', freq='D')""" + + exp2 = """PeriodIndex(['2011-01-01'], dtype='int64', freq='D')""" + + exp3 = """PeriodIndex(['2011-01-01', '2011-01-02'], dtype='int64', freq='D')""" + + exp4 = """PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], dtype='int64', freq='D')""" + + exp5 = """PeriodIndex(['2011', '2012', '2013'], dtype='int64', freq='A-DEC')""" + + exp6 = """PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], dtype='int64', freq='H')""" + + exp7 = """PeriodIndex(['2013Q1'], dtype='int64', freq='Q-DEC')""" + + exp8 = """PeriodIndex(['2013Q1', '2013Q2'], dtype='int64', freq='Q-DEC')""" + + exp9 = """PeriodIndex(['2013Q1', '2013Q2', '2013Q3'], dtype='int64', freq='Q-DEC')""" for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9], [exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9]): diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 5c50b952f8109..55f95b602779f 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1627,7 +1627,7 @@ class _AssertRaisesContextmanager(object): def __init__(self, exception, regexp=None, *args, **kwargs): self.exception = exception if regexp is not None and not hasattr(regexp, "search"): - regexp = re.compile(regexp) + regexp = re.compile(regexp, re.DOTALL) self.regexp = regexp def __enter__(self):