From e19e7f03de81dcf7789a1373db41e353709ceb5e Mon Sep 17 00:00:00 2001 From: jreback Date: Tue, 10 Sep 2013 08:07:53 -0400 Subject: [PATCH] CLN: default for tupleize_cols is now False for both `to_csv and read_csv. Fair warning in 0.12 (GH3604) --- doc/source/io.rst | 15 ++++++--------- doc/source/release.rst | 1 + pandas/core/format.py | 2 +- pandas/core/frame.py | 8 ++++---- pandas/io/parsers.py | 6 +++--- pandas/parser.pyx | 2 +- 6 files changed, 16 insertions(+), 18 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 67cbe35144461..3a284062a2ec9 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -153,7 +153,7 @@ They can take a number of arguments: time and lower memory usage. - ``mangle_dupe_cols``: boolean, default True, then duplicate columns will be specified as 'X.0'...'X.N', rather than 'X'...'X' - - ``tupleize_cols``: boolean, default True, if False, convert a list of tuples + - ``tupleize_cols``: boolean, default False, if False, convert a list of tuples to a multi-index of columns, otherwise, leave the column index as a list of tuples .. ipython:: python @@ -860,19 +860,16 @@ Reading columns with a ``MultiIndex`` By specifying list of row locations for the ``header`` argument, you can read in a ``MultiIndex`` for the columns. Specifying non-consecutive -rows will skip the interveaning rows. +rows will skip the interveaning rows. In order to have the pre-0.13 behavior +of tupleizing columns, specify ``tupleize_cols=True``. .. ipython:: python from pandas.util.testing import makeCustomDataframe as mkdf df = mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4) - df.to_csv('mi.csv',tupleize_cols=False) + df.to_csv('mi.csv') print open('mi.csv').read() - pd.read_csv('mi.csv',header=[0,1,2,3],index_col=[0,1],tupleize_cols=False) - -Note: The default behavior in 0.12 remains unchanged (``tupleize_cols=True``) from prior versions, -but starting with 0.13, the default *to* write and read multi-index columns will be in the new -format (``tupleize_cols=False``) + pd.read_csv('mi.csv',header=[0,1,2,3],index_col=[0,1]) Note: If an ``index_col`` is not specified (e.g. you don't have an index, or wrote it with ``df.to_csv(..., index=False``), then any ``names`` on the columns index will be *lost*. @@ -966,7 +963,7 @@ function takes a number of arguments. Only the first is required. - ``sep`` : Field delimiter for the output file (default ",") - ``encoding``: a string representing the encoding to use if the contents are non-ascii, for python versions prior to 3 - - ``tupleize_cols``: boolean, default True, if False, write as a list of tuples, + - ``tupleize_cols``: boolean, default False, if False, write as a list of tuples, otherwise write in an expanded line format suitable for ``read_csv`` Writing a formatted string diff --git a/doc/source/release.rst b/doc/source/release.rst index de7aa675380b7..4f02fdbbfe97a 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -188,6 +188,7 @@ API Changes a list can be passed to ``to_replace`` (:issue:`4743`). - provide automatic dtype conversions on _reduce operations (:issue:`3371`) - exclude non-numerics if mixed types with datelike in _reduce operations (:issue:`3371`) + - default for ``tupleize_cols`` is now ``False`` for both ``to_csv`` and ``read_csv``. Fair warning in 0.12 (:issue:`3604`) Internal Refactoring ~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/core/format.py b/pandas/core/format.py index 6b4dc979d5279..92fcfaa5f2f9c 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -787,7 +787,7 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None, cols=None, header=True, index=True, index_label=None, mode='w', nanRep=None, encoding=None, quoting=None, line_terminator='\n', chunksize=None, engine=None, - tupleize_cols=True, quotechar='"'): + tupleize_cols=False, quotechar='"'): self.engine = engine # remove for 0.13 self.obj = obj diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 52d3a15d8d184..71d7f826781df 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1191,7 +1191,7 @@ def from_csv(cls, path, header=0, sep=',', index_col=0, is used. Different default from read_table parse_dates : boolean, default True Parse dates. Different default from read_table - tupleize_cols : boolean, default True + tupleize_cols : boolean, default False write multi_index columns as a list of tuples (if True) or new (expanded format) if False) @@ -1208,7 +1208,7 @@ def from_csv(cls, path, header=0, sep=',', index_col=0, from pandas.io.parsers import read_table return read_table(path, header=header, sep=sep, parse_dates=parse_dates, index_col=index_col, - encoding=encoding, tupleize_cols=False) + encoding=encoding, tupleize_cols=tupleize_cols) def to_sparse(self, fill_value=None, kind='block'): """ @@ -1291,7 +1291,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None, cols=None, header=True, index=True, index_label=None, mode='w', nanRep=None, encoding=None, quoting=None, line_terminator='\n', chunksize=None, - tupleize_cols=True, **kwds): + tupleize_cols=False, **kwds): r"""Write DataFrame to a comma-separated values (csv) file Parameters @@ -1331,7 +1331,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None, defaults to csv.QUOTE_MINIMAL chunksize : int or None rows to write at a time - tupleize_cols : boolean, default True + tupleize_cols : boolean, default False write multi_index columns as a list of tuples (if True) or new (expanded format) if False) """ diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index e1b09eb76415f..06940e3bb2b4c 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -247,7 +247,7 @@ def _read(filepath_or_buffer, kwds): 'squeeze': False, 'compression': None, 'mangle_dupe_cols': True, - 'tupleize_cols':True, + 'tupleize_cols':False, } @@ -336,7 +336,7 @@ def parser_f(filepath_or_buffer, encoding=None, squeeze=False, mangle_dupe_cols=True, - tupleize_cols=True, + tupleize_cols=False, ): # Alias sep -> delimiter. @@ -656,7 +656,7 @@ def __init__(self, kwds): self.na_fvalues = kwds.get('na_fvalues') self.true_values = kwds.get('true_values') self.false_values = kwds.get('false_values') - self.tupleize_cols = kwds.get('tupleize_cols',True) + self.tupleize_cols = kwds.get('tupleize_cols',False) self._date_conv = _make_date_converter(date_parser=self.date_parser, dayfirst=self.dayfirst) diff --git a/pandas/parser.pyx b/pandas/parser.pyx index 8b90e76fa4bf3..b97929023adb6 100644 --- a/pandas/parser.pyx +++ b/pandas/parser.pyx @@ -310,7 +310,7 @@ cdef class TextReader: skip_footer=0, verbose=False, mangle_dupe_cols=True, - tupleize_cols=True): + tupleize_cols=False): self.parser = parser_new() self.parser.chunksize = tokenize_chunksize