From c80b04543316dc60365b1b993bb90c72c85e27ee Mon Sep 17 00:00:00 2001 From: Ka Wo Chen Date: Sun, 10 Jan 2016 18:01:00 -0500 Subject: [PATCH 01/10] BUG: GH11880 where __contains__ fails in unpacked DataFrame with object columns --- doc/source/whatsnew/v0.18.0.txt | 1 + pandas/core/window.py | 1 + pandas/hashtable.pyx | 4 +- pandas/io/tests/test_packers.py | 74 ++++++++++++++++++++++++++++++--- 4 files changed, 72 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 4ce2ce5b69cb4..3496e9eea834c 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -463,6 +463,7 @@ Bug Fixes - Bug in ``pd.read_clipboard`` and ``pd.to_clipboard`` functions not supporting Unicode; upgrade included ``pyperclip`` to v1.5.15 (:issue:`9263`) - Bug in ``DataFrame.query`` containing an assignment (:issue:`8664`) +- Bug in ``from_msgpack`` where ``__contains__()`` fails for columns of the unpacked ``DataFrame``, if the ``DataFrame`` has object columns. (:issue: `11880`) - Bug in timezone info lost when broadcasting scalar datetime to ``DataFrame`` (:issue:`11682`) diff --git a/pandas/core/window.py b/pandas/core/window.py index 1e5816e898baa..ce8fda9e932bc 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -965,6 +965,7 @@ def corr(self, other=None, pairwise=None, **kwargs): Use a standard estimation bias correction """ + class EWM(_Rolling): r""" Provides exponential weighted functions diff --git a/pandas/hashtable.pyx b/pandas/hashtable.pyx index 58e9d64921e0d..a5fcbd3f2d0f1 100644 --- a/pandas/hashtable.pyx +++ b/pandas/hashtable.pyx @@ -342,7 +342,7 @@ cdef class Int64HashTable(HashTable): self.table.vals[k] = values[i] @cython.boundscheck(False) - def map_locations(self, int64_t[:] values): + def map_locations(self, ndarray[int64_t, ndim=1] values): cdef: Py_ssize_t i, n = len(values) int ret = 0 @@ -570,7 +570,7 @@ cdef class Float64HashTable(HashTable): return np.asarray(labels) @cython.boundscheck(False) - def map_locations(self, float64_t[:] values): + def map_locations(self, ndarray[float64_t, ndim=1] values): cdef: Py_ssize_t i, n = len(values) int ret = 0 diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py index d6a9feb1bd8f4..61b24c858b60d 100644 --- a/pandas/io/tests/test_packers.py +++ b/pandas/io/tests/test_packers.py @@ -9,8 +9,8 @@ from pandas import compat from pandas.compat import u from pandas import (Series, DataFrame, Panel, MultiIndex, bdate_range, - date_range, period_range, Index, SparseSeries, SparseDataFrame, - SparsePanel) + date_range, period_range, Index) +from pandas.io.packers import to_msgpack, read_msgpack import pandas.util.testing as tm from pandas.util.testing import (ensure_clean, assert_index_equal, assert_series_equal, @@ -23,7 +23,19 @@ nan = np.nan -from pandas.io.packers import to_msgpack, read_msgpack +try: + import blosc # NOQA +except ImportError: + _BLOSC_INSTALLED = False +else: + _BLOSC_INSTALLED = True + +try: + import zlib # NOQA +except ImportError: + _ZLIB_INSTALLED = False +else: + _ZLIB_INSTALLED = True _multiprocess_can_split_ = False @@ -483,6 +495,14 @@ class TestCompression(TestPackers): """ def setUp(self): + try: + from sqlalchemy import create_engine + self._create_sql_engine = create_engine + except ImportError: + self._SQLALCHEMY_INSTALLED = False + else: + self._SQLALCHEMY_INSTALLED = True + super(TestCompression, self).setUp() data = { 'A': np.arange(1000, dtype=np.float64), @@ -508,14 +528,56 @@ def test_compression_zlib(self): assert_frame_equal(self.frame[k], i_rec[k]) def test_compression_blosc(self): - try: - import blosc - except ImportError: + if not _BLOSC_INSTALLED: raise nose.SkipTest('no blosc') i_rec = self.encode_decode(self.frame, compress='blosc') for k in self.frame.keys(): assert_frame_equal(self.frame[k], i_rec[k]) + def test_readonly_axis_blosc(self): + # GH11880 + if not _BLOSC_INSTALLED: + raise nose.SkipTest('no blosc') + df1 = DataFrame({'A': list('abcd')}) + df2 = DataFrame(df1, index=[1., 2., 3., 4.]) + self.assertTrue(1 in self.encode_decode(df1['A'], compress='blosc')) + self.assertTrue(1. in self.encode_decode(df2['A'], compress='blosc')) + + def test_readonly_axis_zlib(self): + # GH11880 + df1 = DataFrame({'A': list('abcd')}) + df2 = DataFrame(df1, index=[1., 2., 3., 4.]) + self.assertTrue(1 in self.encode_decode(df1['A'], compress='zlib')) + self.assertTrue(1. in self.encode_decode(df2['A'], compress='zlib')) + + def test_readonly_axis_blosc_to_sql(self): + # GH11880 + if not _BLOSC_INSTALLED: + raise nose.SkipTest('no blosc') + if not self._SQLALCHEMY_INSTALLED: + raise nose.SkipTest('no sqlalchemy') + expected = DataFrame({'A': list('abcd')}) + df = self.encode_decode(expected, compress='blosc') + eng = self._create_sql_engine("sqlite:///:memory:") + df.to_sql('test', eng, if_exists='append') + result = pandas.read_sql_table('test', eng, index_col='index') + result.index.names = [None] + assert_frame_equal(expected, result) + + def test_readonly_axis_zlib_to_sql(self): + # GH11880 + if not _ZLIB_INSTALLED: + raise nose.SkipTest('no zlib') + if not self._SQLALCHEMY_INSTALLED: + raise nose.SkipTest('no sqlalchemy') + expected = DataFrame({'A': list('abcd')}) + df = self.encode_decode(expected, compress='zlib') + eng = self._create_sql_engine("sqlite:///:memory:") + df.to_sql('test', eng, if_exists='append') + result = pandas.read_sql_table('test', eng, index_col='index') + result.index.names = [None] + assert_frame_equal(expected, result) + class TestEncoding(TestPackers): def setUp(self): From 0dfb0b9723efdb7876c9fa0d7c09a57adb90f576 Mon Sep 17 00:00:00 2001 From: Nicolas Bonnotte Date: Thu, 14 Jan 2016 17:29:04 +0100 Subject: [PATCH 02/10] ENH in .to_latex() support for utf-8 encoding in Python 2, #7061 --- doc/source/whatsnew/v0.18.0.txt | 3 + pandas/core/format.py | 217 ++++++++++++++++++-------------- pandas/core/frame.py | 8 +- pandas/tests/test_format.py | 20 +++ 4 files changed, 153 insertions(+), 95 deletions(-) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 3496e9eea834c..44c4d27895ae6 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -303,6 +303,9 @@ Other API Changes - ``.memory_usage`` now includes values in the index, as does memory_usage in ``.info`` (:issue:`11597`) +- ``DataFrame.to_latex()`` now supports non-ascii encodings (eg utf-8) in Python 2 with the parameter ``encoding`` (:issue:`7061`) + + Changes to eval ^^^^^^^^^^^^^^^ diff --git a/pandas/core/format.py b/pandas/core/format.py index 86d39c139fb51..a50edd9462431 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -619,105 +619,20 @@ def _join_multiline(self, *strcols): st = ed return '\n\n'.join(str_lst) - def to_latex(self, column_format=None, longtable=False): + def to_latex(self, column_format=None, longtable=False, encoding=None): """ Render a DataFrame to a LaTeX tabular/longtable environment output. """ - self.escape = self.kwds.get('escape', True) - def get_col_type(dtype): - if issubclass(dtype.type, np.number): - return 'r' - else: - return 'l' - - frame = self.frame - - if len(frame.columns) == 0 or len(frame.index) == 0: - info_line = (u('Empty %s\nColumns: %s\nIndex: %s') - % (type(self.frame).__name__, - frame.columns, frame.index)) - strcols = [[info_line]] - else: - strcols = self._to_str_columns() - - if self.index and isinstance(self.frame.index, MultiIndex): - clevels = self.frame.columns.nlevels - strcols.pop(0) - name = any(self.frame.index.names) - for i, lev in enumerate(self.frame.index.levels): - lev2 = lev.format() - blank = ' ' * len(lev2[0]) - lev3 = [blank] * clevels - if name: - lev3.append(lev.name) - for level_idx, group in itertools.groupby( - self.frame.index.labels[i]): - count = len(list(group)) - lev3.extend([lev2[level_idx]] + [blank] * (count - 1)) - strcols.insert(i, lev3) - - if column_format is None: - dtypes = self.frame.dtypes._values - column_format = ''.join(map(get_col_type, dtypes)) - if self.index: - index_format = 'l' * self.frame.index.nlevels - column_format = index_format + column_format - elif not isinstance(column_format, - compat.string_types): # pragma: no cover - raise AssertionError('column_format must be str or unicode, not %s' - % type(column_format)) - - def write(buf, frame, column_format, strcols, longtable=False): - if not longtable: - buf.write('\\begin{tabular}{%s}\n' % column_format) - buf.write('\\toprule\n') - else: - buf.write('\\begin{longtable}{%s}\n' % column_format) - buf.write('\\toprule\n') - - nlevels = frame.columns.nlevels - if any(frame.index.names): - nlevels += 1 - for i, row in enumerate(zip(*strcols)): - if i == nlevels and self.header: - buf.write('\\midrule\n') # End of header - if longtable: - buf.write('\\endhead\n') - buf.write('\\midrule\n') - buf.write('\\multicolumn{3}{r}{{Continued on next ' - 'page}} \\\\\n') - buf.write('\midrule\n') - buf.write('\endfoot\n\n') - buf.write('\\bottomrule\n') - buf.write('\\endlastfoot\n') - if self.escape: - crow = [(x.replace('\\', '\\textbackslash') # escape backslashes first - .replace('_', '\\_') - .replace('%', '\\%') - .replace('$', '\\$') - .replace('#', '\\#') - .replace('{', '\\{') - .replace('}', '\\}') - .replace('~', '\\textasciitilde') - .replace('^', '\\textasciicircum') - .replace('&', '\\&') if x else '{}') for x in row] - else: - crow = [x if x else '{}' for x in row] - buf.write(' & '.join(crow)) - buf.write(' \\\\\n') - - if not longtable: - buf.write('\\bottomrule\n') - buf.write('\\end{tabular}\n') - else: - buf.write('\\end{longtable}\n') + latex_renderer = LatexFormatter(self, column_format=column_format, + longtable=longtable) if hasattr(self.buf, 'write'): - write(self.buf, frame, column_format, strcols, longtable) + latex_renderer.write_result(self.buf) elif isinstance(self.buf, compat.string_types): - with open(self.buf, 'w') as f: - write(f, frame, column_format, strcols, longtable) + import codecs + with codecs.open(self.buf, 'w', encoding=encoding) as f: + latex_renderer.write_result(f) else: raise TypeError('buf is not a file name and it has no write ' 'method') @@ -851,6 +766,124 @@ def _get_column_name_list(self): return names +class LatexFormatter(TableFormatter): + """ Used to render a DataFrame to a LaTeX tabular/longtable environment + output. + + Parameters + ---------- + formatter : `DataFrameFormatter` + column_format : str, default None + The columns format as specified in `LaTeX table format + `__ e.g 'rcl' for 3 columns + longtable : boolean, default False + Use a longtable environment instead of tabular. + + See also + -------- + HTMLFormatter + """ + + def __init__(self, formatter, column_format=None, longtable=False): + self.fmt = formatter + self.frame = self.fmt.frame + self.column_format = column_format + self.longtable = longtable + + def write_result(self, buf): + """ + Render a DataFrame to a LaTeX tabular/longtable environment output. + """ + + # string representation of the columns + if len(self.frame.columns) == 0 or len(self.frame.index) == 0: + info_line = (u('Empty %s\nColumns: %s\nIndex: %s') + % (type(self.frame).__name__, + self.frame.columns, self.frame.index)) + strcols = [[info_line]] + else: + strcols = self.fmt._to_str_columns() + + def get_col_type(dtype): + if issubclass(dtype.type, np.number): + return 'r' + else: + return 'l' + + if self.fmt.index and isinstance(self.frame.index, MultiIndex): + clevels = self.frame.columns.nlevels + strcols.pop(0) + name = any(self.frame.index.names) + for i, lev in enumerate(self.frame.index.levels): + lev2 = lev.format() + blank = ' ' * len(lev2[0]) + lev3 = [blank] * clevels + if name: + lev3.append(lev.name) + for level_idx, group in itertools.groupby( + self.frame.index.labels[i]): + count = len(list(group)) + lev3.extend([lev2[level_idx]] + [blank] * (count - 1)) + strcols.insert(i, lev3) + + column_format = self.column_format + if column_format is None: + dtypes = self.frame.dtypes._values + column_format = ''.join(map(get_col_type, dtypes)) + if self.fmt.index: + index_format = 'l' * self.frame.index.nlevels + column_format = index_format + column_format + elif not isinstance(column_format, + compat.string_types): # pragma: no cover + raise AssertionError('column_format must be str or unicode, not %s' + % type(column_format)) + + if not self.longtable: + buf.write('\\begin{tabular}{%s}\n' % column_format) + buf.write('\\toprule\n') + else: + buf.write('\\begin{longtable}{%s}\n' % column_format) + buf.write('\\toprule\n') + + nlevels = self.frame.columns.nlevels + if any(self.frame.index.names): + nlevels += 1 + for i, row in enumerate(zip(*strcols)): + if i == nlevels and self.fmt.header: + buf.write('\\midrule\n') # End of header + if self.longtable: + buf.write('\\endhead\n') + buf.write('\\midrule\n') + buf.write('\\multicolumn{3}{r}{{Continued on next ' + 'page}} \\\\\n') + buf.write('\\midrule\n') + buf.write('\\endfoot\n\n') + buf.write('\\bottomrule\n') + buf.write('\\endlastfoot\n') + if self.fmt.kwds.get('escape', True): + # escape backslashes first + crow = [(x.replace('\\', '\\textbackslash') + .replace('_', '\\_') + .replace('%', '\\%') + .replace('$', '\\$') + .replace('#', '\\#') + .replace('{', '\\{') + .replace('}', '\\}') + .replace('~', '\\textasciitilde') + .replace('^', '\\textasciicircum') + .replace('&', '\\&') if x else '{}') for x in row] + else: + crow = [x if x else '{}' for x in row] + buf.write(' & '.join(crow)) + buf.write(' \\\\\n') + + if not self.longtable: + buf.write('\\bottomrule\n') + buf.write('\\end{tabular}\n') + else: + buf.write('\\end{longtable}\n') + + class HTMLFormatter(TableFormatter): indent_delta = 2 diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7220b25daf318..b27c4268796dd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1547,7 +1547,7 @@ def to_latex(self, buf=None, columns=None, col_space=None, colSpace=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, sparsify=None, index_names=True, bold_rows=True, column_format=None, - longtable=None, escape=None): + longtable=None, escape=None, encoding=None): """ Render a DataFrame to a tabular environment table. You can splice this into a LaTeX document. Requires \\usepackage{booktabs}. @@ -1567,7 +1567,8 @@ def to_latex(self, buf=None, columns=None, col_space=None, colSpace=None, default: True When set to False prevents from escaping latex special characters in column names. - + encoding : str, default None + Default encoding is ascii in Python 2 and utf-8 in Python 3 """ if colSpace is not None: # pragma: no cover @@ -1589,7 +1590,8 @@ def to_latex(self, buf=None, columns=None, col_space=None, colSpace=None, sparsify=sparsify, index_names=index_names, escape=escape) - formatter.to_latex(column_format=column_format, longtable=longtable) + formatter.to_latex(column_format=column_format, longtable=longtable, + encoding=encoding) if buf is None: return formatter.buf.getvalue() diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index 4d17610d87bea..a73b459459321 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -15,6 +15,8 @@ from numpy.random import randn import numpy as np +import codecs + div_style = '' try: import IPython @@ -2554,6 +2556,24 @@ def test_to_latex_filename(self): with open(path, 'r') as f: self.assertEqual(self.frame.to_latex(), f.read()) + # test with utf-8 and encoding option (GH 7061) + df = DataFrame([[u'au\xdfgangen']]) + with tm.ensure_clean('test.tex') as path: + df.to_latex(path, encoding='utf-8') + with codecs.open(path, 'r', encoding='utf-8') as f: + self.assertEqual(df.to_latex(), f.read()) + + # test with utf-8 without encoding option + if compat.PY3: # python3 default encoding is utf-8 + with tm.ensure_clean('test.tex') as path: + df.to_latex(path) + with codecs.open(path, 'r') as f: + self.assertEqual(df.to_latex(), f.read()) + else: + # python2 default encoding is ascii, so an error should be raised + with tm.ensure_clean('test.tex') as path: + self.assertRaises(UnicodeEncodeError, df.to_latex, path) + def test_to_latex(self): # it works! self.frame.to_latex() From 5e3bec055c047860ba558dc3d391ec33a7cfd95b Mon Sep 17 00:00:00 2001 From: RahulHP Date: Fri, 15 Jan 2016 22:37:11 +0530 Subject: [PATCH 03/10] ENH : Allow to_sql to recognize single sql type #11886 --- pandas/io/sql.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 95a6d02b1ccb6..afe650e65f588 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -564,6 +564,10 @@ def to_sql(frame, name, con, flavor='sqlite', schema=None, if_exists='fail', raise NotImplementedError("'frame' argument should be either a " "Series or a DataFrame") + if dtype and not isinstance(dtype,dict): + temp_type = dtype + dtype = { col_name : temp_type for col_name in frame } + pandas_sql.to_sql(frame, name, if_exists=if_exists, index=index, index_label=index_label, schema=schema, chunksize=chunksize, dtype=dtype) From 44147b4d1f223bc9e2536bb5e4a5ea360b059b9b Mon Sep 17 00:00:00 2001 From: RahulHP Date: Sat, 16 Jan 2016 11:48:51 +0530 Subject: [PATCH 04/10] added is_dictlike --- pandas/io/sql.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index afe650e65f588..e5a0ede93aaab 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -19,6 +19,7 @@ from pandas.core.common import isnull from pandas.core.base import PandasObject from pandas.core.dtypes import DatetimeTZDtype +from pandas.core.generic import is_dictlike from pandas.tseries.tools import to_datetime from pandas.util.decorators import Appender @@ -564,7 +565,7 @@ def to_sql(frame, name, con, flavor='sqlite', schema=None, if_exists='fail', raise NotImplementedError("'frame' argument should be either a " "Series or a DataFrame") - if dtype and not isinstance(dtype,dict): + if dtype and not is_dictlike(dtype): temp_type = dtype dtype = { col_name : temp_type for col_name in frame } From 9b0e479f246500be959557d8af31ea41e983bb67 Mon Sep 17 00:00:00 2001 From: RahulHP Date: Sat, 16 Jan 2016 11:51:07 +0530 Subject: [PATCH 05/10] Added helper functions test_to_sql_single_dtype --- pandas/io/tests/test_sql.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index bfd1ac3f08ee8..eb42e7050d405 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -1165,6 +1165,9 @@ def test_to_sql(self): def test_to_sql_empty(self): self._to_sql_empty() + def test_to_sql_single_dtype(self): + self.to_sql_single_dtype() + def test_to_sql_fail(self): self._to_sql_fail() @@ -1878,6 +1881,9 @@ def test_to_sql(self): def test_to_sql_empty(self): self._to_sql_empty() + def test_to_sql_single_dtype(self): + self.to_sql_single_dtype() + def test_to_sql_fail(self): self._to_sql_fail() From d67011cd7f7a569cf44cf04cd86456b4efb49a31 Mon Sep 17 00:00:00 2001 From: RahulHP Date: Sat, 16 Jan 2016 12:59:20 +0530 Subject: [PATCH 06/10] Tests added --- pandas/io/sql.py | 10 +++++++--- pandas/io/tests/test_sql.py | 14 +++++++++++--- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index e5a0ede93aaab..1321b07731cd6 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -565,9 +565,6 @@ def to_sql(frame, name, con, flavor='sqlite', schema=None, if_exists='fail', raise NotImplementedError("'frame' argument should be either a " "Series or a DataFrame") - if dtype and not is_dictlike(dtype): - temp_type = dtype - dtype = { col_name : temp_type for col_name in frame } pandas_sql.to_sql(frame, name, if_exists=if_exists, index=index, index_label=index_label, schema=schema, @@ -1232,6 +1229,9 @@ def to_sql(self, frame, name, if_exists='fail', index=True, be a SQLAlchemy type. """ + if dtype and not is_dictlike(dtype): + temp_type = dtype + dtype = { col_name : temp_type for col_name in frame } if dtype is not None: from sqlalchemy.types import to_instance, TypeEngine for col, my_type in dtype.items(): @@ -1628,6 +1628,10 @@ def to_sql(self, frame, name, if_exists='fail', index=True, be a string. """ + if dtype and not is_dictlike(dtype): + temp_type = dtype + dtype = { col_name : temp_type for col_name in frame } + if dtype is not None: for col, my_type in dtype.items(): if not isinstance(my_type, str): diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index eb42e7050d405..dd14b4751fbcc 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -367,6 +367,14 @@ def _to_sql_empty(self): self.drop_table('test_frame1') self.pandasSQL.to_sql(self.test_frame1.iloc[:0], 'test_frame1') + def _to_sql_single_dtype(self,dtype): + self.drop_table('test_frame1') + self.pandasSQL.to_sql(self.test_frame1[['A','B']],'test_frame1',dtype=dtype) + self.assertTrue(self.pandasSQL.has_table( + 'test_frame1'), 'Table not written to DB') + + self.drop_table('test_frame1') + def _to_sql_fail(self): self.drop_table('test_frame1') @@ -1166,8 +1174,8 @@ def test_to_sql_empty(self): self._to_sql_empty() def test_to_sql_single_dtype(self): - self.to_sql_single_dtype() - + self._to_sql_single_dtype(dtype=sqltypes.NVARCHAR) + def test_to_sql_fail(self): self._to_sql_fail() @@ -1882,7 +1890,7 @@ def test_to_sql_empty(self): self._to_sql_empty() def test_to_sql_single_dtype(self): - self.to_sql_single_dtype() + self._to_sql_single_dtype(dtype='float64') def test_to_sql_fail(self): self._to_sql_fail() From 8690116fd1569a4bb6a0acdb53fd27b289c6874e Mon Sep 17 00:00:00 2001 From: RahulHP Date: Sat, 16 Jan 2016 13:23:04 +0530 Subject: [PATCH 07/10] Tests and whatsnew updated --- doc/source/whatsnew/v0.18.0.txt | 1 + pandas/io/sql.py | 15 ++++++++++----- pandas/io/tests/test_sql.py | 2 +- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 44c4d27895ae6..081e84c57c0ac 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -118,6 +118,7 @@ Other enhancements - ``Series`` gained an ``is_unique`` attribute (:issue:`11946`) - ``DataFrame.quantile`` and ``Series.quantile`` now accept ``interpolation`` keyword (:issue:`10174`). - ``DataFrame.select_dtypes`` now allows the ``np.float16`` typecode (:issue:`11990`) +- ``DataFrame.to_sql `` now allows a single value as the SQL type for all columns (:issue:`11886`). .. _whatsnew_0180.enhancements.rounding: diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 1321b07731cd6..0e910842f631c 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -549,9 +549,11 @@ def to_sql(frame, name, con, flavor='sqlite', schema=None, if_exists='fail', chunksize : int, default None If not None, then rows will be written in batches of this size at a time. If None, all rows will be written at once. - dtype : dict of column name to SQL type, default None + dtype : single SQL type or dict of column name to SQL type, default None Optional specifying the datatype for columns. The SQL type should be a SQLAlchemy type, or a string for sqlite3 fallback connection. + If all columns are of the same type, one single value can be + used. """ if if_exists not in ('fail', 'replace', 'append'): @@ -1224,9 +1226,11 @@ def to_sql(self, frame, name, if_exists='fail', index=True, chunksize : int, default None If not None, then rows will be written in batches of this size at a time. If None, all rows will be written at once. - dtype : dict of column name to SQL type, default None + dtype : single SQL type or dict of column name to SQL type, default None Optional specifying the datatype for columns. The SQL type should - be a SQLAlchemy type. + be a SQLAlchemy type.If all columns are of the same type, one + single value can be used. + """ if dtype and not is_dictlike(dtype): @@ -1623,9 +1627,10 @@ def to_sql(self, frame, name, if_exists='fail', index=True, chunksize : int, default None If not None, then rows will be written in batches of this size at a time. If None, all rows will be written at once. - dtype : dict of column name to SQL type, default None + dtype : single SQL type or dict of column name to SQL type, default None Optional specifying the datatype for columns. The SQL type should - be a string. + be a string. If all columns are of the same type, one single + value can be used. """ if dtype and not is_dictlike(dtype): diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index dd14b4751fbcc..d6168aec97c65 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -1174,7 +1174,7 @@ def test_to_sql_empty(self): self._to_sql_empty() def test_to_sql_single_dtype(self): - self._to_sql_single_dtype(dtype=sqltypes.NVARCHAR) + self._to_sql_single_dtype(dtype=sqltypes.Float) def test_to_sql_fail(self): self._to_sql_fail() From 10912955756931139d81dc09653204e974a95c7b Mon Sep 17 00:00:00 2001 From: RahulHP Date: Sat, 16 Jan 2016 13:24:43 +0530 Subject: [PATCH 08/10] ENH: Allow single sql type --- pandas/io/sql.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 0e910842f631c..464cae168be16 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -566,7 +566,6 @@ def to_sql(frame, name, con, flavor='sqlite', schema=None, if_exists='fail', elif not isinstance(frame, DataFrame): raise NotImplementedError("'frame' argument should be either a " "Series or a DataFrame") - pandas_sql.to_sql(frame, name, if_exists=if_exists, index=index, index_label=index_label, schema=schema, From a507b9f36560828775506836e9bfe45b5918b6b1 Mon Sep 17 00:00:00 2001 From: RahulHP Date: Sat, 16 Jan 2016 17:14:03 +0530 Subject: [PATCH 09/10] New tests written similar to test_dtype --- pandas/io/tests/test_sql.py | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index d6168aec97c65..406a15ffcd1a2 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -367,14 +367,6 @@ def _to_sql_empty(self): self.drop_table('test_frame1') self.pandasSQL.to_sql(self.test_frame1.iloc[:0], 'test_frame1') - def _to_sql_single_dtype(self,dtype): - self.drop_table('test_frame1') - self.pandasSQL.to_sql(self.test_frame1[['A','B']],'test_frame1',dtype=dtype) - self.assertTrue(self.pandasSQL.has_table( - 'test_frame1'), 'Table not written to DB') - - self.drop_table('test_frame1') - def _to_sql_fail(self): self.drop_table('test_frame1') @@ -1174,7 +1166,19 @@ def test_to_sql_empty(self): self._to_sql_empty() def test_to_sql_single_dtype(self): - self._to_sql_single_dtype(dtype=sqltypes.Float) + self.drop_table('single_dtype_test') + cols = ['A','B'] + data = [('a','b'), + ('c','d')] + df = DataFrame(data,columns=cols) + df.to_sql('single_dtype_test',self.conn,dtype=sqlalchemy.TEXT) + meta = sqlalchemy.schema.MetaData(bind=self.conn) + meta.reflect() + sqltypea = meta.tables['single_dtype_test'].columns['A'].type + sqltypeb = meta.tables['single_dtype_test'].columns['B'].type + self.assertTrue(isinstance(sqltypea, sqlalchemy.TEXT)) + self.assertTrue(isinstance(sqltypeb, sqlalchemy.TEXT)) + self.drop_table('single_dtype_test') def test_to_sql_fail(self): self._to_sql_fail() @@ -1890,7 +1894,17 @@ def test_to_sql_empty(self): self._to_sql_empty() def test_to_sql_single_dtype(self): - self._to_sql_single_dtype(dtype='float64') + if self.flavor == 'mysql': + raise nose.SkipTest('Not applicable to MySQL legacy') + self.drop_table('single_dtype_test') + cols = ['A','B'] + data = [('a','b'), + ('c','d')] + df = DataFrame(data,columns=cols) + df.to_sql('single_dtype_test',self.conn,dtype='STRING') + self.assertEqual(self._get_sqlite_column_type('single_dtype_test','A'),'STRING') + self.assertEqual(self._get_sqlite_column_type('single_dtype_test','B'),'STRING') + self.drop_table('single_dtype_test') def test_to_sql_fail(self): self._to_sql_fail() From 425bc7962e1bc11f4fce5b724d5ccc195b95a656 Mon Sep 17 00:00:00 2001 From: RahulHP Date: Mon, 18 Jan 2016 11:47:40 +0530 Subject: [PATCH 10/10] Minor re-arrangements in code --- pandas/io/sql.py | 10 +++---- pandas/io/tests/test_sql.py | 56 ++++++++++++++++++------------------- 2 files changed, 32 insertions(+), 34 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 464cae168be16..8cf7e0eb15b48 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -551,7 +551,7 @@ def to_sql(frame, name, con, flavor='sqlite', schema=None, if_exists='fail', time. If None, all rows will be written at once. dtype : single SQL type or dict of column name to SQL type, default None Optional specifying the datatype for columns. The SQL type should - be a SQLAlchemy type, or a string for sqlite3 fallback connection. + be a SQLAlchemy type, or a string for sqlite3 fallback connection. If all columns are of the same type, one single value can be used. @@ -1227,14 +1227,13 @@ def to_sql(self, frame, name, if_exists='fail', index=True, time. If None, all rows will be written at once. dtype : single SQL type or dict of column name to SQL type, default None Optional specifying the datatype for columns. The SQL type should - be a SQLAlchemy type.If all columns are of the same type, one + be a SQLAlchemy type. If all columns are of the same type, one single value can be used. """ if dtype and not is_dictlike(dtype): - temp_type = dtype - dtype = { col_name : temp_type for col_name in frame } + dtype = { col_name : dtype for col_name in frame } if dtype is not None: from sqlalchemy.types import to_instance, TypeEngine for col, my_type in dtype.items(): @@ -1633,8 +1632,7 @@ def to_sql(self, frame, name, if_exists='fail', index=True, """ if dtype and not is_dictlike(dtype): - temp_type = dtype - dtype = { col_name : temp_type for col_name in frame } + dtype = { col_name : dtype for col_name in frame } if dtype is not None: for col, my_type in dtype.items(): diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 406a15ffcd1a2..909713d50a1ab 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -1165,21 +1165,6 @@ def test_to_sql(self): def test_to_sql_empty(self): self._to_sql_empty() - def test_to_sql_single_dtype(self): - self.drop_table('single_dtype_test') - cols = ['A','B'] - data = [('a','b'), - ('c','d')] - df = DataFrame(data,columns=cols) - df.to_sql('single_dtype_test',self.conn,dtype=sqlalchemy.TEXT) - meta = sqlalchemy.schema.MetaData(bind=self.conn) - meta.reflect() - sqltypea = meta.tables['single_dtype_test'].columns['A'].type - sqltypeb = meta.tables['single_dtype_test'].columns['B'].type - self.assertTrue(isinstance(sqltypea, sqlalchemy.TEXT)) - self.assertTrue(isinstance(sqltypeb, sqlalchemy.TEXT)) - self.drop_table('single_dtype_test') - def test_to_sql_fail(self): self._to_sql_fail() @@ -1524,6 +1509,21 @@ def test_dtype(self): self.assertTrue(isinstance(sqltype, sqlalchemy.String)) self.assertEqual(sqltype.length, 10) + def test_to_sql_single_dtype(self): + self.drop_table('single_dtype_test') + cols = ['A','B'] + data = [('a','b'), + ('c','d')] + df = DataFrame(data,columns=cols) + df.to_sql('single_dtype_test',self.conn,dtype=sqlalchemy.TEXT) + meta = sqlalchemy.schema.MetaData(bind=self.conn) + meta.reflect() + sqltypea = meta.tables['single_dtype_test'].columns['A'].type + sqltypeb = meta.tables['single_dtype_test'].columns['B'].type + self.assertTrue(isinstance(sqltypea, sqlalchemy.TEXT)) + self.assertTrue(isinstance(sqltypeb, sqlalchemy.TEXT)) + self.drop_table('single_dtype_test') + def test_notnull_dtype(self): cols = {'Bool': Series([True,None]), 'Date': Series([datetime(2012, 5, 1), None]), @@ -1893,19 +1893,6 @@ def test_to_sql(self): def test_to_sql_empty(self): self._to_sql_empty() - def test_to_sql_single_dtype(self): - if self.flavor == 'mysql': - raise nose.SkipTest('Not applicable to MySQL legacy') - self.drop_table('single_dtype_test') - cols = ['A','B'] - data = [('a','b'), - ('c','d')] - df = DataFrame(data,columns=cols) - df.to_sql('single_dtype_test',self.conn,dtype='STRING') - self.assertEqual(self._get_sqlite_column_type('single_dtype_test','A'),'STRING') - self.assertEqual(self._get_sqlite_column_type('single_dtype_test','B'),'STRING') - self.drop_table('single_dtype_test') - def test_to_sql_fail(self): self._to_sql_fail() @@ -1995,6 +1982,19 @@ def test_dtype(self): self.assertRaises(ValueError, df.to_sql, 'error', self.conn, dtype={'B': bool}) + def test_to_sql_single_dtype(self): + if self.flavor == 'mysql': + raise nose.SkipTest('Not applicable to MySQL legacy') + self.drop_table('single_dtype_test') + cols = ['A','B'] + data = [('a','b'), + ('c','d')] + df = DataFrame(data,columns=cols) + df.to_sql('single_dtype_test',self.conn,dtype='STRING') + self.assertEqual(self._get_sqlite_column_type('single_dtype_test','A'),'STRING') + self.assertEqual(self._get_sqlite_column_type('single_dtype_test','B'),'STRING') + self.drop_table('single_dtype_test') + def test_notnull_dtype(self): if self.flavor == 'mysql': raise nose.SkipTest('Not applicable to MySQL legacy')