diff --git a/RELEASE.rst b/RELEASE.rst index 5293b858b72a3..5b512814d0fec 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -35,6 +35,7 @@ pandas 0.11.1 GH3606_) - Support for reading Amazon S3 files. (GH3504_) - Added module for reading and writing Stata files: pandas.io.stata (GH1512_) + includes ``to_stata`` DataFrame method, and a ``read_stata`` top-level reader - Added support for writing in ``to_csv`` and reading in ``read_csv``, multi-index columns. The ``header`` option in ``read_csv`` now accepts a list of the rows from which to read the index. Added the option, @@ -104,6 +105,11 @@ pandas 0.11.1 does not control triggering of summary, similar to < 0.11.0. - Add the keyword ``allow_duplicates`` to ``DataFrame.insert`` to allow a duplicate column to be inserted if ``True``, default is ``False`` (same as prior to 0.11.1) (GH3679_) + - io API changes + + - added ``pandas.io.api`` for i/o imports + - removed ``Excel`` support to ``pandas.io.excel`` + - added top-level ``pd.read_sql`` and ``to_sql`` DataFrame methods **Bug Fixes** diff --git a/doc/source/10min.rst b/doc/source/10min.rst index 2eda474d7954f..19bacdc81bdf9 100644 --- a/doc/source/10min.rst +++ b/doc/source/10min.rst @@ -699,8 +699,7 @@ Reading from an excel file .. ipython:: python - xls = ExcelFile('foo.xlsx') - xls.parse('sheet1', index_col=None, na_values=['NA']) + read_excel('foo.xlsx', 'sheet1', index_col=None, na_values=['NA']) .. ipython:: python :suppress: diff --git a/doc/source/api.rst b/doc/source/api.rst index c5b83e4af6999..2e59bf6533205 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -48,7 +48,20 @@ File IO read_table read_csv - ExcelFile.parse + +.. currentmodule:: pandas.io.excel + +.. autosummary:: + :toctree: generated/ + + read_excel + +.. currentmodule:: pandas.io.stata + +.. autosummary:: + :toctree: generated/ + + read_stata .. currentmodule:: pandas.io.html @@ -57,15 +70,29 @@ File IO read_html +SQL +~~~ + +.. currentmodule:: pandas.io.sql + +.. autosummary:: + :toctree: generated/ + + read_sql + HDFStore: PyTables (HDF5) ~~~~~~~~~~~~~~~~~~~~~~~~~ + .. currentmodule:: pandas.io.pytables .. autosummary:: :toctree: generated/ + read_hdf HDFStore.put + HDFStore.append HDFStore.get + HDFStore.select Standard moving window functions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -532,9 +559,11 @@ Serialization / IO / Conversion DataFrame.load DataFrame.save DataFrame.to_csv + DataFrame.to_hdf DataFrame.to_dict DataFrame.to_excel DataFrame.to_html + DataFrame.to_stata DataFrame.to_records DataFrame.to_sparse DataFrame.to_string diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index 8aac415721f9a..7f6b54667765d 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -32,25 +32,25 @@ Selection The :ref:`indexing ` docs. -`Boolean Rows Indexing +Indexing using both row labels and conditionals, see +`here `__ - Indexing using both row labels and conditionals -`Using loc and iloc in selections +Use loc for label-oriented slicing and iloc positional slicing, see +`here `__ - Use loc for label-oriented slicing and iloc positional slicing -`Extending a panel along the minor axis +Extend a panel frame by transposing, adding a new dimension, and transposing back to the original dimensions, see +`here `__ - Extend a panel frame by transposing, adding a new dimension, and transposing back to the original dimensions -`Boolean masking in a panel +Mask a panel by using ``np.where`` and then reconstructing the panel with the new masked values +`here `__ - Mask a panel by using ``np.where`` and then reconstructing the panel with the new masked values -`Selecting via the complement +Using ``~`` to take the complement of a boolean array, see +`here `__ - ``~`` can be used to take the complement of a boolean array `Efficiently creating columns using applymap `__ diff --git a/doc/source/io.rst b/doc/source/io.rst index a1ba88c0d798b..92747f9906da2 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -9,6 +9,7 @@ import csv from StringIO import StringIO import pandas as pd + ExcelWriter = pd.ExcelWriter import numpy as np np.random.seed(123456) @@ -27,6 +28,18 @@ IO Tools (Text, CSV, HDF5, ...) ******************************* +The Pandas I/O api is a set of top level ``reader`` functions accessed like ``pd.read_csv()`` that generally return a ``pandas`` +object. The corresponding ``writer`` functions are object methods that are accessed like ``df.to_csv()`` + +.. csv-table:: + :widths: 12, 15, 15, 15, 15 + :delim: ; + + Reader; ``read_csv``; ``read_excel``; ``read_hdf``; ``read_sql`` + Writer; ``to_csv``; ``to_excel``; ``to_hdf``; ``to_sql`` + Reader; ``read_html``; ``read_stata``; ``read_clipboard`` ; + Writer; ``to_html``; ``to_stata``; ``to_clipboard`` ; + .. _io.read_csv_table: CSV & Text files @@ -971,29 +984,33 @@ And then import the data directly to a DataFrame by calling: Excel files ----------- -The ``ExcelFile`` class can read an Excel 2003 file using the ``xlrd`` Python +The ``read_excel`` method can read an Excel 2003 file using the ``xlrd`` Python module and use the same parsing code as the above to convert tabular data into a DataFrame. See the :ref:`cookbook` for some advanced strategies -To use it, create the ``ExcelFile`` object: +.. note:: -.. code-block:: python + The prior method of accessing Excel is now deprecated as of 0.11.1, + this will work but will be removed in a future version. - xls = ExcelFile('path_to_file.xls') + .. code-block:: python -Then use the ``parse`` instance method with a sheetname, then use the same -additional arguments as the parsers above: + from pandas.io.parsers import ExcelFile + xls = ExcelFile('path_to_file.xls') + xls.parse('Sheet1', index_col=None, na_values=['NA']) -.. code-block:: python + Replaced by + + .. code-block:: python - xls.parse('Sheet1', index_col=None, na_values=['NA']) + read_excel('path_to_file.xls', 'Sheet1', index_col=None, na_values=['NA']) To read sheets from an Excel 2007 file, you can pass a filename with a ``.xlsx`` extension, in which case the ``openpyxl`` module will be used to read the file. It is often the case that users will insert columns to do temporary computations -in Excel and you may not want to read in those columns. `ExcelFile.parse` takes +in Excel and you may not want to read in those columns. `read_excel` takes a `parse_cols` keyword to allow you to specify a subset of columns to parse. If `parse_cols` is an integer, then it is assumed to indicate the last column @@ -1001,14 +1018,14 @@ to be parsed. .. code-block:: python - xls.parse('Sheet1', parse_cols=2, index_col=None, na_values=['NA']) + read_excel('path_to_file.xls', 'Sheet1', parse_cols=2, index_col=None, na_values=['NA']) If `parse_cols` is a list of integers, then it is assumed to be the file column indices to be parsed. .. code-block:: python - xls.parse('Sheet1', parse_cols=[0, 2, 3], index_col=None, na_values=['NA']) + read_excel('path_to_file.xls', Sheet1', parse_cols=[0, 2, 3], index_col=None, na_values=['NA']) To write a DataFrame object to a sheet of an Excel file, you can use the ``to_excel`` instance method. The arguments are largely the same as ``to_csv`` @@ -1883,16 +1900,13 @@ Writing to STATA format .. _io.StataWriter: -The function :func:'~pandas.io.StataWriter.write_file' will write a DataFrame -into a .dta file. The format version of this file is always the latest one, -115. +The method ``to_stata`` will write a DataFrame into a .dta file. +The format version of this file is always the latest one, 115. .. ipython:: python - from pandas.io.stata import StataWriter df = DataFrame(randn(10,2),columns=list('AB')) - writer = StataWriter('stata.dta',df) - writer.write_file() + df.to_stata('stata.dta') Reading from STATA format ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1901,24 +1915,21 @@ Reading from STATA format .. versionadded:: 0.11.1 -The class StataReader will read the header of the given dta file at -initialization. Its function :func:'~pandas.io.StataReader.data' will -read the observations, converting them to a DataFrame which is returned: +The top-level function ``read_stata`` will read a dta format file +and return a DataFrame: .. ipython:: python - from pandas.io.stata import StataReader - reader = StataReader('stata.dta') - reader.data() + pd.read_stata('stata.dta') -The parameter convert_categoricals indicates wheter value labels should be -read and used to create a Categorical variable from them. Value labels can -also be retrieved by the function variable_labels, which requires data to be -called before. +Currently the ``index`` is retrieved as a column on read back. -The StataReader supports .dta Formats 104, 105, 108, 113-115. +The parameter ``convert_categoricals`` indicates wheter value labels should be +read and used to create a ``Categorical`` variable from them. Value labels can +also be retrieved by the function ``variable_labels``, which requires data to be +called before (see ``pandas.io.stata.StataReader``). -Alternatively, the function :func:'~pandas.io.read_stata' can be used +The StataReader supports .dta Formats 104, 105, 108, 113-115. .. ipython:: python :suppress: diff --git a/doc/source/v0.10.0.txt b/doc/source/v0.10.0.txt index 0c5497868efe2..51075a61bec4d 100644 --- a/doc/source/v0.10.0.txt +++ b/doc/source/v0.10.0.txt @@ -1,5 +1,10 @@ .. _whatsnew_0100: +.. ipython:: python + :suppress: + + from StringIO import StringIO + v0.10.0 (December 17, 2012) --------------------------- diff --git a/doc/source/v0.11.1.txt b/doc/source/v0.11.1.txt index bd4a7c49fbb4d..7cedb62693c73 100644 --- a/doc/source/v0.11.1.txt +++ b/doc/source/v0.11.1.txt @@ -6,6 +6,19 @@ v0.11.1 (??) This is a minor release from 0.11.0 and includes several new features and enhancements along with a large number of bug fixes. +The I/O api is now much more consistent with the following top-level reading +functions available, e.g. ``pd.read_csv``, and the counterpart writers are +available as object methods, e.g. ``df.to_csv`` + +.. csv-table:: + :widths: 12, 15, 15, 15, 15 + :delim: ; + + Reader; ``read_csv``; ``read_excel``; ``read_hdf``; ``read_sql`` + Writer; ``to_csv``; ``to_excel``; ``to_hdf``; ``to_sql`` + Reader; ``read_html``; ``read_stata``; ``read_clipboard`` ; + Writer; ``to_html``; ``to_stata``; ``to_clipboard`` ; + API changes ~~~~~~~~~~~ @@ -74,6 +87,31 @@ API changes - Add the keyword ``allow_duplicates`` to ``DataFrame.insert`` to allow a duplicate column to be inserted if ``True``, default is ``False`` (same as prior to 0.11.1) (GH3679_) + - IO api + + - added top-level function ``read_excel`` to replace the following, + The original API is deprecated and will be removed in a future version + + .. code-block:: python + + from pandas.io.parsers import ExcelFile + xls = ExcelFile('path_to_file.xls') + xls.parse('Sheet1', index_col=None, na_values=['NA']) + + With + + .. code-block:: python + + import pandas as pd + pd.read_excel('path_to_file.xls', 'Sheet1', index_col=None, na_values=['NA']) + + - added top-level function ``read_sql`` that is equivalent to the following + + .. code-block:: python + + from pandas.io.sql import read_frame + read_frame(....) + Enhancements ~~~~~~~~~~~~ @@ -109,6 +147,8 @@ Enhancements a list or tuple. - Added module for reading and writing Stata files: pandas.io.stata (GH1512_) + accessable via ``read_stata`` top-level function for reading, + and ``to_stata`` DataFrame method for writing - ``DataFrame.replace()`` now allows regular expressions on contained ``Series`` with object dtype. See the examples section in the regular docs @@ -218,7 +258,7 @@ Bug Fixes .. ipython :: python df = DataFrame({'a': list('ab..'), 'b': [1, 2, 3, 4]}) - df.replace(regex=r'\s*\.\s*', value=nan) + df.replace(regex=r'\s*\.\s*', value=np.nan) to replace all occurrences of the string ``'.'`` with zero or more instances of surrounding whitespace with ``NaN``. @@ -227,7 +267,7 @@ Bug Fixes .. ipython :: python - df.replace('.', nan) + df.replace('.', np.nan) to replace all occurrences of the string ``'.'`` with ``NaN``. diff --git a/pandas/__init__.py b/pandas/__init__.py index bf5bcc81bc21e..da4c146da3cfd 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -28,12 +28,8 @@ from pandas.sparse.api import * from pandas.stats.api import * from pandas.tseries.api import * +from pandas.io.api import * -from pandas.io.parsers import (read_csv, read_table, read_clipboard, - read_fwf, to_clipboard, ExcelFile, - ExcelWriter) -from pandas.io.pytables import HDFStore, Term, get_store, read_hdf -from pandas.io.html import read_html from pandas.util.testing import debug from pandas.tools.describe import value_range diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9e276e01dd723..ea8dee51565ac 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1300,35 +1300,6 @@ def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=parse_dates, index_col=index_col, encoding=encoding,tupleize_cols=False) - @classmethod - def from_dta(dta, path, parse_dates=True, convert_categoricals=True, encoding=None, index_col=None): - """ - Read Stata file into DataFrame - - Parameters - ---------- - path : string file path or file handle / StringIO - parse_dates : boolean, default True - Convert date variables to DataFrame time values - convert_categoricals : boolean, default True - Read value labels and convert columns to Categorical/Factor variables - encoding : string, None or encoding, default None - Encoding used to parse the files. Note that Stata doesn't - support unicode. None defaults to cp1252. - index_col : int or sequence, default None - Column to use for index. If a sequence is given, a MultiIndex - is used. Different default from read_table - - Notes - ----- - - Returns - ------- - y : DataFrame - """ - from pandas.io.stata import read_stata - return read_stata(path, parse_dates=parse_dates, convert_categoricals=convert_categoricals, encoding=encoding, index=index_col) - def to_sparse(self, fill_value=None, kind='block'): """ Convert to SparseDataFrame @@ -1510,7 +1481,7 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', >>> df2.to_excel(writer,'sheet2') >>> writer.save() """ - from pandas.io.parsers import ExcelWriter + from pandas.io.excel import ExcelWriter need_save = False if isinstance(excel_writer, basestring): excel_writer = ExcelWriter(excel_writer) @@ -1529,6 +1500,57 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', if need_save: excel_writer.save() + def to_stata(self, fname, convert_dates=None, write_index=True, encoding="latin-1", + byteorder=None): + """ + A class for writing Stata binary dta files from array-like objects + + Parameters + ---------- + fname : file path or buffer + Where to save the dta file. + convert_dates : dict + Dictionary mapping column of datetime types to the stata internal + format that you want to use for the dates. Options are + 'tc', 'td', 'tm', 'tw', 'th', 'tq', 'ty'. Column can be either a + number or a name. + encoding : str + Default is latin-1. Note that Stata does not support unicode. + byteorder : str + Can be ">", "<", "little", or "big". The default is None which uses + `sys.byteorder` + + Examples + -------- + >>> writer = StataWriter('./data_file.dta', data) + >>> writer.write_file() + + Or with dates + + >>> writer = StataWriter('./date_data_file.dta', data, {2 : 'tw'}) + >>> writer.write_file() + """ + from pandas.io.stata import StataWriter + writer = StataWriter(fname,self,convert_dates=convert_dates, encoding=encoding, byteorder=byteorder) + writer.write_file() + + def to_sql(self, name, con, flavor='sqlite', if_exists='fail', **kwargs): + """ + Write records stored in a DataFrame to a SQL database. + + Parameters + ---------- + name: name of SQL table + conn: an open SQL database connection object + flavor: {'sqlite', 'mysql', 'oracle'}, default 'sqlite' + if_exists: {'fail', 'replace', 'append'}, default 'fail' + fail: If table exists, do nothing. + replace: If table exists, drop it, recreate it, and insert data. + append: If table exists, insert data. Create if does not exist. + """ + from pandas.io.sql import write_frame + write_frame(self, name, con, flavor=flavor, if_exists=if_exists, **kwargs) + @Appender(fmt.docstring_to_string, indents=1) def to_string(self, buf=None, columns=None, col_space=None, colSpace=None, header=True, index=True, na_rep='NaN', formatters=None, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4a80e2f65fd71..aa574219a259e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -491,6 +491,10 @@ def to_hdf(self, path_or_buf, key, **kwargs): from pandas.io import pytables return pytables.to_hdf(path_or_buf, key, self, **kwargs) + def to_clipboard(self): + from pandas.io import parsers + parsers.to_clipboard(self) + # install the indexerse for _name, _indexer in indexing.get_indexers_list(): PandasObject._create_indexer(_name,_indexer) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index fa1305d27058e..0a099661c58f1 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -596,7 +596,7 @@ def to_excel(self, path, na_rep=''): na_rep : string, default '' Missing data representation """ - from pandas.io.parsers import ExcelWriter + from pandas.io.excel import ExcelWriter writer = ExcelWriter(path) for item, df in self.iteritems(): name = str(item) diff --git a/pandas/io/api.py b/pandas/io/api.py new file mode 100644 index 0000000000000..e4c0c8c0c77f0 --- /dev/null +++ b/pandas/io/api.py @@ -0,0 +1,11 @@ +""" +Data IO api +""" + +from pandas.io.parsers import (read_csv, read_table, read_clipboard, + read_fwf, to_clipboard) +from pandas.io.excel import ExcelFile, ExcelWriter, read_excel +from pandas.io.pytables import HDFStore, Term, get_store, read_hdf +from pandas.io.html import read_html +from pandas.io.sql import read_sql +from pandas.io.stata import read_stata diff --git a/pandas/io/excel.py b/pandas/io/excel.py new file mode 100644 index 0000000000000..ea102cb6803d7 --- /dev/null +++ b/pandas/io/excel.py @@ -0,0 +1,462 @@ +""" +Module parse to/from Excel +""" + +#---------------------------------------------------------------------- +# ExcelFile class + +import datetime +from itertools import izip +import numpy as np + +from pandas.core.index import Index, MultiIndex +from pandas.core.frame import DataFrame +import pandas.core.common as com +from pandas.util import py3compat +from pandas.io.parsers import TextParser +from pandas.tseries.period import Period +import json + +def read_excel(path_or_buf, sheetname, header=0, skiprows=None, skip_footer=0, + index_col=None, parse_cols=None, parse_dates=False, + date_parser=None, na_values=None, thousands=None, chunksize=None, + kind=None, **kwds): + """ + Read Excel table into DataFrame + + Parameters + ---------- + sheetname : string + Name of Excel sheet + header : int, default 0 + Row to use for the column labels of the parsed DataFrame + skiprows : list-like + Rows to skip at the beginning (0-indexed) + skip_footer : int, default 0 + Rows at the end to skip (0-indexed) + index_col : int, default None + Column to use as the row labels of the DataFrame. Pass None if + there is no such column + parse_cols : int or list, default None + If None then parse all columns, + If int then indicates last column to be parsed + If list of ints then indicates list of column numbers to be parsed + If string then indicates comma separated list of column names and + column ranges (e.g. "A:E" or "A,C,E:F") + na_values : list-like, default None + List of additional strings to recognize as NA/NaN + + Returns + ------- + parsed : DataFrame + """ + return ExcelFile(path_or_buf,kind=kind).parse(sheetname=sheetname, + header=0, skiprows=None, skip_footer=0, + index_col=None, parse_cols=None, parse_dates=False, + date_parser=None, na_values=None, thousands=None, chunksize=None, + kind=None, **kwds) + +class ExcelFile(object): + """ + Class for parsing tabular excel sheets into DataFrame objects. + Uses xlrd. See ExcelFile.parse for more documentation + + Parameters + ---------- + path : string or file-like object + Path to xls or xlsx file + """ + def __init__(self, path_or_buf, kind=None, **kwds): + self.kind = kind + + import xlrd # throw an ImportError if we need to + ver = tuple(map(int,xlrd.__VERSION__.split(".")[:2])) + if ver < (0, 9): + raise ImportError("pandas requires xlrd >= 0.9.0 for excel support, current version "+xlrd.__VERSION__) + + self.path_or_buf = path_or_buf + self.tmpfile = None + + if isinstance(path_or_buf, basestring): + self.book = xlrd.open_workbook(path_or_buf) + else: + data = path_or_buf.read() + self.book = xlrd.open_workbook(file_contents=data) + + def __repr__(self): + return object.__repr__(self) + + def parse(self, sheetname, header=0, skiprows=None, skip_footer=0, + index_col=None, parse_cols=None, parse_dates=False, + date_parser=None, na_values=None, thousands=None, chunksize=None, + **kwds): + """ + Read Excel table into DataFrame + + Parameters + ---------- + sheetname : string + Name of Excel sheet + header : int, default 0 + Row to use for the column labels of the parsed DataFrame + skiprows : list-like + Rows to skip at the beginning (0-indexed) + skip_footer : int, default 0 + Rows at the end to skip (0-indexed) + index_col : int, default None + Column to use as the row labels of the DataFrame. Pass None if + there is no such column + parse_cols : int or list, default None + If None then parse all columns, + If int then indicates last column to be parsed + If list of ints then indicates list of column numbers to be parsed + If string then indicates comma separated list of column names and + column ranges (e.g. "A:E" or "A,C,E:F") + na_values : list-like, default None + List of additional strings to recognize as NA/NaN + + Returns + ------- + parsed : DataFrame + """ + + # has_index_names: boolean, default False + # True if the cols defined in index_col have an index name and are + # not in the header + has_index_names = False # removed as new argument of API function + + skipfooter = kwds.pop('skipfooter', None) + if skipfooter is not None: + skip_footer = skipfooter + + return self._parse_excel(sheetname, header=header, + skiprows=skiprows, index_col=index_col, + has_index_names=has_index_names, + parse_cols=parse_cols, + parse_dates=parse_dates, + date_parser=date_parser, + na_values=na_values, + thousands=thousands, + chunksize=chunksize, + skip_footer=skip_footer) + + def _should_parse(self, i, parse_cols): + + def _range2cols(areas): + """ + Convert comma separated list of column names and column ranges to a + list of 0-based column indexes. + + >>> _range2cols('A:E') + [0, 1, 2, 3, 4] + >>> _range2cols('A,C,Z:AB') + [0, 2, 25, 26, 27] + """ + def _excel2num(x): + "Convert Excel column name like 'AB' to 0-based column index" + return reduce(lambda s, a: s * 26 + ord(a) - ord('A') + 1, x.upper().strip(), 0) - 1 + + cols = [] + for rng in areas.split(','): + if ':' in rng: + rng = rng.split(':') + cols += range(_excel2num(rng[0]), _excel2num(rng[1]) + 1) + else: + cols.append(_excel2num(rng)) + return cols + + if isinstance(parse_cols, int): + return i <= parse_cols + elif isinstance(parse_cols, basestring): + return i in _range2cols(parse_cols) + else: + return i in parse_cols + + def _parse_excel(self, sheetname, header=0, skiprows=None, + skip_footer=0, index_col=None, has_index_names=None, + parse_cols=None, parse_dates=False, date_parser=None, + na_values=None, thousands=None, chunksize=None): + from xlrd import (xldate_as_tuple, XL_CELL_DATE, + XL_CELL_ERROR, XL_CELL_BOOLEAN) + + datemode = self.book.datemode + sheet = self.book.sheet_by_name(sheetname) + + data = [] + should_parse = {} + for i in range(sheet.nrows): + row = [] + for j, (value, typ) in enumerate(izip(sheet.row_values(i), + sheet.row_types(i))): + if parse_cols is not None and j not in should_parse: + should_parse[j] = self._should_parse(j, parse_cols) + + if parse_cols is None or should_parse[j]: + if typ == XL_CELL_DATE: + dt = xldate_as_tuple(value, datemode) + # how to produce this first case? + if dt[0] < datetime.MINYEAR: # pragma: no cover + value = datetime.time(*dt[3:]) + else: + value = datetime.datetime(*dt) + elif typ == XL_CELL_ERROR: + value = np.nan + elif typ == XL_CELL_BOOLEAN: + value = bool(value) + row.append(value) + + data.append(row) + + if header is not None: + data[header] = _trim_excel_header(data[header]) + + parser = TextParser(data, header=header, index_col=index_col, + has_index_names=has_index_names, + na_values=na_values, + thousands=thousands, + parse_dates=parse_dates, + date_parser=date_parser, + skiprows=skiprows, + skip_footer=skip_footer, + chunksize=chunksize) + + return parser.read() + + @property + def sheet_names(self): + return self.book.sheet_names() + + +def _trim_excel_header(row): + # trim header row so auto-index inference works + # xlrd uses '' , openpyxl None + while len(row) > 0 and (row[0] == '' or row[0] is None): + row = row[1:] + return row + + +class CellStyleConverter(object): + """ + Utility Class which converts a style dict to xlrd or openpyxl style + """ + + @staticmethod + def to_xls(style_dict, num_format_str=None): + """ + converts a style_dict to an xlwt style object + Parameters + ---------- + style_dict: style dictionary to convert + """ + import xlwt + + def style_to_xlwt(item, firstlevel=True, field_sep=',', line_sep=';'): + """helper wich recursively generate an xlwt easy style string + for example: + + hstyle = {"font": {"bold": True}, + "border": {"top": "thin", + "right": "thin", + "bottom": "thin", + "left": "thin"}, + "align": {"horiz": "center"}} + will be converted to + font: bold on; \ + border: top thin, right thin, bottom thin, left thin; \ + align: horiz center; + """ + if hasattr(item, 'items'): + if firstlevel: + it = ["%s: %s" % (key, style_to_xlwt(value, False)) + for key, value in item.items()] + out = "%s " % (line_sep).join(it) + return out + else: + it = ["%s %s" % (key, style_to_xlwt(value, False)) + for key, value in item.items()] + out = "%s " % (field_sep).join(it) + return out + else: + item = "%s" % item + item = item.replace("True", "on") + item = item.replace("False", "off") + return item + + if style_dict: + xlwt_stylestr = style_to_xlwt(style_dict) + style = xlwt.easyxf(xlwt_stylestr, field_sep=',', line_sep=';') + else: + style = xlwt.XFStyle() + if num_format_str is not None: + style.num_format_str = num_format_str + + return style + + @staticmethod + def to_xlsx(style_dict): + """ + converts a style_dict to an openpyxl style object + Parameters + ---------- + style_dict: style dictionary to convert + """ + + from openpyxl.style import Style + xls_style = Style() + for key, value in style_dict.items(): + for nk, nv in value.items(): + if key == "borders": + (xls_style.borders.__getattribute__(nk) + .__setattr__('border_style', nv)) + else: + xls_style.__getattribute__(key).__setattr__(nk, nv) + + return xls_style + + +def _conv_value(val): + # convert value for excel dump + if isinstance(val, np.int64): + val = int(val) + elif isinstance(val, np.bool8): + val = bool(val) + elif isinstance(val, Period): + val = "%s" % val + + return val + + +class ExcelWriter(object): + """ + Class for writing DataFrame objects into excel sheets, uses xlwt for xls, + openpyxl for xlsx. See DataFrame.to_excel for typical usage. + + Parameters + ---------- + path : string + Path to xls file + """ + def __init__(self, path): + self.use_xlsx = True + if path.endswith('.xls'): + self.use_xlsx = False + import xlwt + self.book = xlwt.Workbook() + self.fm_datetime = xlwt.easyxf( + num_format_str='YYYY-MM-DD HH:MM:SS') + self.fm_date = xlwt.easyxf(num_format_str='YYYY-MM-DD') + else: + from openpyxl.workbook import Workbook + self.book = Workbook() # optimized_write=True) + # open pyxl 1.6.1 adds a dummy sheet remove it + if self.book.worksheets: + self.book.remove_sheet(self.book.worksheets[0]) + self.path = path + self.sheets = {} + self.cur_sheet = None + + def save(self): + """ + Save workbook to disk + """ + self.book.save(self.path) + + def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): + """ + Write given formated cells into Excel an excel sheet + + Parameters + ---------- + cells : generator + cell of formated data to save to Excel sheet + sheet_name : string, default None + Name of Excel sheet, if None, then use self.cur_sheet + startrow: upper left cell row to dump data frame + startcol: upper left cell column to dump data frame + """ + if sheet_name is None: + sheet_name = self.cur_sheet + if sheet_name is None: # pragma: no cover + raise Exception('Must pass explicit sheet_name or set ' + 'cur_sheet property') + if self.use_xlsx: + self._writecells_xlsx(cells, sheet_name, startrow, startcol) + else: + self._writecells_xls(cells, sheet_name, startrow, startcol) + + def _writecells_xlsx(self, cells, sheet_name, startrow, startcol): + + from openpyxl.cell import get_column_letter + + if sheet_name in self.sheets: + wks = self.sheets[sheet_name] + else: + wks = self.book.create_sheet() + wks.title = sheet_name + self.sheets[sheet_name] = wks + + for cell in cells: + colletter = get_column_letter(startcol + cell.col + 1) + xcell = wks.cell("%s%s" % (colletter, startrow + cell.row + 1)) + xcell.value = _conv_value(cell.val) + if cell.style: + style = CellStyleConverter.to_xlsx(cell.style) + for field in style.__fields__: + xcell.style.__setattr__(field, + style.__getattribute__(field)) + + if isinstance(cell.val, datetime.datetime): + xcell.style.number_format.format_code = "YYYY-MM-DD HH:MM:SS" + elif isinstance(cell.val, datetime.date): + xcell.style.number_format.format_code = "YYYY-MM-DD" + + # merging requires openpyxl latest (works on 1.6.1) + # todo add version check + if cell.mergestart is not None and cell.mergeend is not None: + cletterstart = get_column_letter(startcol + cell.col + 1) + cletterend = get_column_letter(startcol + cell.mergeend + 1) + + wks.merge_cells('%s%s:%s%s' % (cletterstart, + startrow + cell.row + 1, + cletterend, + startrow + cell.mergestart + 1)) + + def _writecells_xls(self, cells, sheet_name, startrow, startcol): + if sheet_name in self.sheets: + wks = self.sheets[sheet_name] + else: + wks = self.book.add_sheet(sheet_name) + self.sheets[sheet_name] = wks + + style_dict = {} + + for cell in cells: + val = _conv_value(cell.val) + + num_format_str = None + if isinstance(cell.val, datetime.datetime): + num_format_str = "YYYY-MM-DD HH:MM:SS" + if isinstance(cell.val, datetime.date): + num_format_str = "YYYY-MM-DD" + + stylekey = json.dumps(cell.style) + if num_format_str: + stylekey += num_format_str + + if stylekey in style_dict: + style = style_dict[stylekey] + else: + style = CellStyleConverter.to_xls(cell.style, num_format_str) + style_dict[stylekey] = style + + if cell.mergestart is not None and cell.mergeend is not None: + wks.write_merge(startrow + cell.row, + startrow + cell.mergestart, + startcol + cell.col, + startcol + cell.mergeend, + val, style) + else: + wks.write(startrow + cell.row, + startcol + cell.col, + val, style) + diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 0dde47e6065e4..249afe0755445 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1981,409 +1981,17 @@ def _make_reader(self, f): self.data = FixedWidthReader(f, self.colspecs, self.delimiter) -#---------------------------------------------------------------------- -# ExcelFile class - -class ExcelFile(object): - """ - Class for parsing tabular excel sheets into DataFrame objects. - Uses xlrd. See ExcelFile.parse for more documentation - - Parameters - ---------- - path : string or file-like object - Path to xls or xlsx file - """ - def __init__(self, path_or_buf, kind=None, **kwds): - self.kind = kind - - import xlrd # throw an ImportError if we need to - ver = tuple(map(int,xlrd.__VERSION__.split(".")[:2])) - if ver < (0, 9): - raise ImportError("pandas requires xlrd >= 0.9.0 for excel support, current version "+xlrd.__VERSION__) - - self.path_or_buf = path_or_buf - self.tmpfile = None - - if isinstance(path_or_buf, basestring): - self.book = xlrd.open_workbook(path_or_buf) - else: - data = path_or_buf.read() - self.book = xlrd.open_workbook(file_contents=data) - - def __repr__(self): - return object.__repr__(self) - - def parse(self, sheetname, header=0, skiprows=None, skip_footer=0, - index_col=None, parse_cols=None, parse_dates=False, - date_parser=None, na_values=None, thousands=None, chunksize=None, - **kwds): - """ - Read Excel table into DataFrame - - Parameters - ---------- - sheetname : string - Name of Excel sheet - header : int, default 0 - Row to use for the column labels of the parsed DataFrame - skiprows : list-like - Rows to skip at the beginning (0-indexed) - skip_footer : int, default 0 - Rows at the end to skip (0-indexed) - index_col : int, default None - Column to use as the row labels of the DataFrame. Pass None if - there is no such column - parse_cols : int or list, default None - If None then parse all columns, - If int then indicates last column to be parsed - If list of ints then indicates list of column numbers to be parsed - If string then indicates comma separated list of column names and - column ranges (e.g. "A:E" or "A,C,E:F") - na_values : list-like, default None - List of additional strings to recognize as NA/NaN - - Returns - ------- - parsed : DataFrame - """ - - # has_index_names: boolean, default False - # True if the cols defined in index_col have an index name and are - # not in the header - has_index_names = False # removed as new argument of API function - - skipfooter = kwds.pop('skipfooter', None) - if skipfooter is not None: - skip_footer = skipfooter - - return self._parse_excel(sheetname, header=header, - skiprows=skiprows, index_col=index_col, - has_index_names=has_index_names, - parse_cols=parse_cols, - parse_dates=parse_dates, - date_parser=date_parser, - na_values=na_values, - thousands=thousands, - chunksize=chunksize, - skip_footer=skip_footer) - - def _should_parse(self, i, parse_cols): - - def _range2cols(areas): - """ - Convert comma separated list of column names and column ranges to a - list of 0-based column indexes. - - >>> _range2cols('A:E') - [0, 1, 2, 3, 4] - >>> _range2cols('A,C,Z:AB') - [0, 2, 25, 26, 27] - """ - def _excel2num(x): - "Convert Excel column name like 'AB' to 0-based column index" - return reduce(lambda s, a: s * 26 + ord(a) - ord('A') + 1, x.upper().strip(), 0) - 1 - - cols = [] - for rng in areas.split(','): - if ':' in rng: - rng = rng.split(':') - cols += range(_excel2num(rng[0]), _excel2num(rng[1]) + 1) - else: - cols.append(_excel2num(rng)) - return cols - - if isinstance(parse_cols, int): - return i <= parse_cols - elif isinstance(parse_cols, basestring): - return i in _range2cols(parse_cols) - else: - return i in parse_cols - - def _parse_excel(self, sheetname, header=0, skiprows=None, - skip_footer=0, index_col=None, has_index_names=None, - parse_cols=None, parse_dates=False, date_parser=None, - na_values=None, thousands=None, chunksize=None): - from xlrd import (xldate_as_tuple, XL_CELL_DATE, - XL_CELL_ERROR, XL_CELL_BOOLEAN) - - datemode = self.book.datemode - sheet = self.book.sheet_by_name(sheetname) - - data = [] - should_parse = {} - for i in range(sheet.nrows): - row = [] - for j, (value, typ) in enumerate(izip(sheet.row_values(i), - sheet.row_types(i))): - if parse_cols is not None and j not in should_parse: - should_parse[j] = self._should_parse(j, parse_cols) - - if parse_cols is None or should_parse[j]: - if typ == XL_CELL_DATE: - dt = xldate_as_tuple(value, datemode) - # how to produce this first case? - if dt[0] < datetime.MINYEAR: # pragma: no cover - value = datetime.time(*dt[3:]) - else: - value = datetime.datetime(*dt) - elif typ == XL_CELL_ERROR: - value = np.nan - elif typ == XL_CELL_BOOLEAN: - value = bool(value) - row.append(value) - - data.append(row) - - if header is not None: - data[header] = _trim_excel_header(data[header]) - - parser = TextParser(data, header=header, index_col=index_col, - has_index_names=has_index_names, - na_values=na_values, - thousands=thousands, - parse_dates=parse_dates, - date_parser=date_parser, - skiprows=skiprows, - skip_footer=skip_footer, - chunksize=chunksize) - - return parser.read() - - @property - def sheet_names(self): - return self.book.sheet_names() - - -def _trim_excel_header(row): - # trim header row so auto-index inference works - # xlrd uses '' , openpyxl None - while len(row) > 0 and (row[0] == '' or row[0] is None): - row = row[1:] - return row - - -class CellStyleConverter(object): - """ - Utility Class which converts a style dict to xlrd or openpyxl style - """ - - @staticmethod - def to_xls(style_dict, num_format_str=None): - """ - converts a style_dict to an xlwt style object - Parameters - ---------- - style_dict: style dictionary to convert - """ - import xlwt - - def style_to_xlwt(item, firstlevel=True, field_sep=',', line_sep=';'): - """helper wich recursively generate an xlwt easy style string - for example: - - hstyle = {"font": {"bold": True}, - "border": {"top": "thin", - "right": "thin", - "bottom": "thin", - "left": "thin"}, - "align": {"horiz": "center"}} - will be converted to - font: bold on; \ - border: top thin, right thin, bottom thin, left thin; \ - align: horiz center; - """ - if hasattr(item, 'items'): - if firstlevel: - it = ["%s: %s" % (key, style_to_xlwt(value, False)) - for key, value in item.items()] - out = "%s " % (line_sep).join(it) - return out - else: - it = ["%s %s" % (key, style_to_xlwt(value, False)) - for key, value in item.items()] - out = "%s " % (field_sep).join(it) - return out - else: - item = "%s" % item - item = item.replace("True", "on") - item = item.replace("False", "off") - return item - - if style_dict: - xlwt_stylestr = style_to_xlwt(style_dict) - style = xlwt.easyxf(xlwt_stylestr, field_sep=',', line_sep=';') - else: - style = xlwt.XFStyle() - if num_format_str is not None: - style.num_format_str = num_format_str - - return style - - @staticmethod - def to_xlsx(style_dict): - """ - converts a style_dict to an openpyxl style object - Parameters - ---------- - style_dict: style dictionary to convert - """ - - from openpyxl.style import Style - xls_style = Style() - for key, value in style_dict.items(): - for nk, nv in value.items(): - if key == "borders": - (xls_style.borders.__getattribute__(nk) - .__setattr__('border_style', nv)) - else: - xls_style.__getattribute__(key).__setattr__(nk, nv) - - return xls_style - - -def _conv_value(val): - # convert value for excel dump - if isinstance(val, np.int64): - val = int(val) - elif isinstance(val, np.bool8): - val = bool(val) - elif isinstance(val, Period): - val = "%s" % val - - return val - - -class ExcelWriter(object): - """ - Class for writing DataFrame objects into excel sheets, uses xlwt for xls, - openpyxl for xlsx. See DataFrame.to_excel for typical usage. - - Parameters - ---------- - path : string - Path to xls file - """ +from pandas.io import excel +class ExcelWriter(excel.ExcelWriter): def __init__(self, path): - self.use_xlsx = True - if path.endswith('.xls'): - self.use_xlsx = False - import xlwt - self.book = xlwt.Workbook() - self.fm_datetime = xlwt.easyxf( - num_format_str='YYYY-MM-DD HH:MM:SS') - self.fm_date = xlwt.easyxf(num_format_str='YYYY-MM-DD') - else: - from openpyxl.workbook import Workbook - self.book = Workbook() # optimized_write=True) - # open pyxl 1.6.1 adds a dummy sheet remove it - if self.book.worksheets: - self.book.remove_sheet(self.book.worksheets[0]) - self.path = path - self.sheets = {} - self.cur_sheet = None - - def save(self): - """ - Save workbook to disk - """ - self.book.save(self.path) + from warnings import warn + warn("ExcelWriter can now be imported from: pandas.io.excel", FutureWarning) + super(ExcelWriter, self).__init__(path) - def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): - """ - Write given formated cells into Excel an excel sheet - - Parameters - ---------- - cells : generator - cell of formated data to save to Excel sheet - sheet_name : string, default None - Name of Excel sheet, if None, then use self.cur_sheet - startrow: upper left cell row to dump data frame - startcol: upper left cell column to dump data frame - """ - if sheet_name is None: - sheet_name = self.cur_sheet - if sheet_name is None: # pragma: no cover - raise Exception('Must pass explicit sheet_name or set ' - 'cur_sheet property') - if self.use_xlsx: - self._writecells_xlsx(cells, sheet_name, startrow, startcol) - else: - self._writecells_xls(cells, sheet_name, startrow, startcol) - - def _writecells_xlsx(self, cells, sheet_name, startrow, startcol): - - from openpyxl.cell import get_column_letter - - if sheet_name in self.sheets: - wks = self.sheets[sheet_name] - else: - wks = self.book.create_sheet() - wks.title = sheet_name - self.sheets[sheet_name] = wks - - for cell in cells: - colletter = get_column_letter(startcol + cell.col + 1) - xcell = wks.cell("%s%s" % (colletter, startrow + cell.row + 1)) - xcell.value = _conv_value(cell.val) - if cell.style: - style = CellStyleConverter.to_xlsx(cell.style) - for field in style.__fields__: - xcell.style.__setattr__(field, - style.__getattribute__(field)) - - if isinstance(cell.val, datetime.datetime): - xcell.style.number_format.format_code = "YYYY-MM-DD HH:MM:SS" - elif isinstance(cell.val, datetime.date): - xcell.style.number_format.format_code = "YYYY-MM-DD" - - # merging requires openpyxl latest (works on 1.6.1) - # todo add version check - if cell.mergestart is not None and cell.mergeend is not None: - cletterstart = get_column_letter(startcol + cell.col + 1) - cletterend = get_column_letter(startcol + cell.mergeend + 1) - - wks.merge_cells('%s%s:%s%s' % (cletterstart, - startrow + cell.row + 1, - cletterend, - startrow + cell.mergestart + 1)) - - def _writecells_xls(self, cells, sheet_name, startrow, startcol): - if sheet_name in self.sheets: - wks = self.sheets[sheet_name] - else: - wks = self.book.add_sheet(sheet_name) - self.sheets[sheet_name] = wks - - style_dict = {} - - for cell in cells: - val = _conv_value(cell.val) - - num_format_str = None - if isinstance(cell.val, datetime.datetime): - num_format_str = "YYYY-MM-DD HH:MM:SS" - if isinstance(cell.val, datetime.date): - num_format_str = "YYYY-MM-DD" - - stylekey = json.dumps(cell.style) - if num_format_str: - stylekey += num_format_str +class ExcelFile(excel.ExcelFile): + def __init__(self, path_or_buf, kind=None, **kwds): + from warnings import warn + warn("ExcelFile can now be imported from: pandas.io.excel", FutureWarning) + super(ExcelFile, self).__init__(path_or_buf, kind=kind, **kwds) - if stylekey in style_dict: - style = style_dict[stylekey] - else: - style = CellStyleConverter.to_xls(cell.style, num_format_str) - style_dict[stylekey] = style - - if cell.mergestart is not None and cell.mergeend is not None: - wks.write_merge(startrow + cell.row, - startrow + cell.mergestart, - startcol + cell.col, - startcol + cell.mergeend, - val, style) - else: - wks.write(startrow + cell.row, - startcol + cell.col, - val, style) + diff --git a/pandas/io/sql.py b/pandas/io/sql.py index b54a30d95bb54..4a1cac8a60e30 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -167,7 +167,7 @@ def read_frame(sql, con, index_col=None, coerce_float=True, params=None): return result frame_query = read_frame - +read_sql = read_frame def write_frame(frame, name, con, flavor='sqlite', if_exists='fail', **kwargs): """ diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 3fc246c2ffbc7..f1257f505ca9b 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -37,10 +37,8 @@ def read_stata(filepath_or_buffer, convert_dates=True, convert_categoricals=True return reader.data(convert_dates, convert_categoricals, index) - _date_formats = ["%tc", "%tC", "%td", "%tw", "%tm", "%tq", "%th", "%ty"] - def _stata_elapsed_date_to_datetime(date, fmt): """ Convert from SIF to datetime. http://www.stata.com/help.cgi?datetime diff --git a/pandas/io/tests/test_cparser.py b/pandas/io/tests/test_cparser.py index 0c5b168ee8de5..23503f74f25f2 100644 --- a/pandas/io/tests/test_cparser.py +++ b/pandas/io/tests/test_cparser.py @@ -18,7 +18,7 @@ from pandas import DataFrame, Series, Index, isnull, MultiIndex import pandas.io.parsers as parsers from pandas.io.parsers import (read_csv, read_table, read_fwf, - ExcelFile, TextParser) + TextParser) from pandas.util.testing import (assert_almost_equal, assert_frame_equal, assert_series_equal, network) import pandas.lib as lib diff --git a/pandas/io/tests/test_date_converters.py b/pandas/io/tests/test_date_converters.py index 9396581f74326..396912c0f5f54 100644 --- a/pandas/io/tests/test_date_converters.py +++ b/pandas/io/tests/test_date_converters.py @@ -15,7 +15,7 @@ from pandas import DataFrame, Series, Index, isnull import pandas.io.parsers as parsers from pandas.io.parsers import (read_csv, read_table, read_fwf, - ExcelFile, TextParser) + TextParser) from pandas.util.testing import (assert_almost_equal, assert_frame_equal, assert_series_equal, network) import pandas.lib as lib diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 8a145517d3b5a..00a695f3013cd 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -17,7 +17,8 @@ from pandas import DataFrame, Series, Index, MultiIndex, DatetimeIndex import pandas.io.parsers as parsers from pandas.io.parsers import (read_csv, read_table, read_fwf, - ExcelFile, TextFileReader, TextParser) + TextParser, TextFileReader) +from pandas.io.excel import ExcelFile, ExcelWriter, read_excel from pandas.util.testing import (assert_almost_equal, assert_series_equal, network, @@ -35,9 +36,6 @@ from pandas._parser import OverflowError -from pandas.io.parsers import (ExcelFile, ExcelWriter, read_csv) - - def _skip_if_no_xlrd(): try: import xlrd @@ -275,19 +273,16 @@ def _check_extension(self, ext): # test roundtrip self.frame.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = reader.parse('test1', index_col=0) + recons = read_excel(path, 'test1', index_col=0) tm.assert_frame_equal(self.frame, recons) self.frame.to_excel(path, 'test1', index=False) - reader = ExcelFile(path) - recons = reader.parse('test1', index_col=None) + recons = read_excel(path, 'test1', index_col=None) recons.index = self.frame.index tm.assert_frame_equal(self.frame, recons) self.frame.to_excel(path, 'test1', na_rep='NA') - reader = ExcelFile(path) - recons = reader.parse('test1', index_col=0, na_values=['NA']) + recons = read_excel(path, 'test1', index_col=0, na_values=['NA']) tm.assert_frame_equal(self.frame, recons) def test_excel_roundtrip_xls_mixed(self): @@ -668,7 +663,7 @@ def test_to_excel_unicode_filename(self): tm.assert_frame_equal(rs, xp) def test_to_excel_styleconverter(self): - from pandas.io.parsers import CellStyleConverter + from pandas.io.excel import CellStyleConverter try: import xlwt @@ -859,6 +854,23 @@ def roundtrip(df, header=True, parser_hdr=0): self.assertEqual(res.shape, (1, 2)) self.assertTrue(res.ix[0, 0] is not np.nan) + def test_deprecated_from_parsers(self): + + # since 0.11.1 changed the import path + import warnings + + with warnings.catch_warnings() as w: + warnings.filterwarnings(action='ignore', category=FutureWarning) + + _skip_if_no_xlrd() + from pandas.io.parsers import ExcelFile as xf + xf(self.xls1) + + _skip_if_no_xlwt() + with ensure_clean('test.xls') as path: + from pandas.io.parsers import ExcelWriter as xw + xw(path) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py index 51062b2ab706f..9f5d796763fb0 100644 --- a/pandas/io/tests/test_stata.py +++ b/pandas/io/tests/test_stata.py @@ -130,23 +130,21 @@ def test_read_dta4(self): def test_write_dta5(self): original = DataFrame([(np.nan, np.nan, np.nan, np.nan, np.nan)], columns=['float_miss', 'double_miss', 'byte_miss', 'int_miss', 'long_miss']) + original.index.name = 'index' with ensure_clean(self.dta5) as path: - writer = StataWriter(path, original, None, False) - writer.write_file() - + original.to_stata(path, None, False) written_and_read_again = self.read_dta(path) - tm.assert_frame_equal(written_and_read_again, original) + tm.assert_frame_equal(written_and_read_again.set_index('index'), original) def test_write_dta6(self): original = self.read_csv(self.csv3) + original.index.name = 'index' with ensure_clean(self.dta6) as path: - writer = StataWriter(path, original, None, False) - writer.write_file() - + original.to_stata(path, None, False) written_and_read_again = self.read_dta(path) - tm.assert_frame_equal(written_and_read_again, original) + tm.assert_frame_equal(written_and_read_again.set_index('index'), original) @nose.tools.nottest def test_read_dta7(self): @@ -184,6 +182,10 @@ def test_read_dta9(self): decimal=3 ) + def test_stata_doc_examples(self): + with ensure_clean(self.dta5) as path: + df = DataFrame(np.random.randn(10,2),columns=list('AB')) + df.to_stata('path') if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 3640025bbf95c..58b7ac272401f 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1367,7 +1367,7 @@ def test_to_excel(self): import xlwt import xlrd import openpyxl - from pandas.io.parsers import ExcelFile + from pandas.io.excel import ExcelFile except ImportError: raise nose.SkipTest diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 5981640b4159c..a2e08bc744ab0 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -15,7 +15,6 @@ import pandas.core.common as com import pandas.core.panel as panelmod from pandas.util import py3compat -from pandas.io.parsers import (ExcelFile, ExcelWriter) from pandas.util.testing import (assert_panel_equal, assert_panel4d_equal,