From b0372fa987e36c5fe165637c51ba234d89537e7e Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Thu, 17 Dec 2020 21:13:00 +0100 Subject: [PATCH 1/5] BUG: Map pandas integer to optimal SQLAlchemy integer type (GH35076) --- pandas/io/sql.py | 8 +++++++- pandas/tests/io/test_sql.py | 39 +++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 23f992ceb009a..87b646fa5173f 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1097,6 +1097,7 @@ def _sqlalchemy_type(self, col): DateTime, Float, Integer, + SmallInteger, Text, Time, ) @@ -1127,8 +1128,13 @@ def _sqlalchemy_type(self, col): else: return Float(precision=53) elif col_type == "integer": - if col.dtype == "int32": + # GH35076 Map pandas integer to optimal SQLAlchemy integer type + if col.dtype.name.lower() in ("int8", "uint8", "int16"): + return SmallInteger + elif col.dtype.name.lower() in ("uint16", "int32"): return Integer + elif col.dtype.name.lower() == "uint64": + raise ValueError("Unsigned 64 bit integer datatype is not supported") else: return BigInteger elif col_type == "boolean": diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 497039de99196..e5b39717d273a 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1139,6 +1139,45 @@ def test_sqlalchemy_type_mapping(self): # GH 9086: TIMESTAMP is the suggested type for datetimes with timezones assert isinstance(table.table.c["time"].type, sqltypes.TIMESTAMP) + @pytest.mark.parametrize( + "integer, expected", + [ + ("int8", "SMALLINT"), + ("Int8", "SMALLINT"), + ("uint8", "SMALLINT"), + ("UInt8", "SMALLINT"), + ("int16", "SMALLINT"), + ("Int16", "SMALLINT"), + ("uint16", "INTEGER"), + ("UInt16", "INTEGER"), + ("int32", "INTEGER"), + ("Int32", "INTEGER"), + ("uint32", "BIGINT"), + ("UInt32", "BIGINT"), + (int, "BIGINT"), + ("int64", "BIGINT"), + ("Int64", "BIGINT"), + ], + ) + def test_sqlalchemy_integer_mapping(self, integer, expected): + # GH35076 Map pandas integer to optimal SQLAlchemy integer type + df = DataFrame([0, 1], columns=["a"], dtype=integer) + db = sql.SQLDatabase(self.conn) + table = sql.SQLTable("test_type", db, frame=df) + + result = str(table.table.c.a.type) + assert result == expected + + @pytest.mark.parametrize("integer", ["uint64", "UInt64"]) + def test_sqlalchemy_integer_overload_mapping(self, integer): + # GH35076 Map pandas integer to optimal SQLAlchemy integer type + df = DataFrame([0, 1], columns=["a"], dtype=integer) + db = sql.SQLDatabase(self.conn) + with pytest.raises( + ValueError, match="Unsigned 64 bit integer datatype is not supported" + ): + sql.SQLTable("test_type", db, frame=df) + def test_database_uri_string(self): # Test read_sql and .to_sql method with a database URI (GH10654) From 67d264071a0aa36b80d1079dbd58af7b1d0bf19d Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Thu, 17 Dec 2020 22:53:30 +0100 Subject: [PATCH 2/5] Add conditional for default 32/64 bit integer size --- pandas/tests/io/test_sql.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index e5b39717d273a..d1a853ca2537f 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -26,6 +26,8 @@ import numpy as np import pytest +from pandas.compat import IS64 + from pandas.core.dtypes.common import is_datetime64_dtype, is_datetime64tz_dtype import pandas as pd @@ -1154,9 +1156,9 @@ def test_sqlalchemy_type_mapping(self): ("Int32", "INTEGER"), ("uint32", "BIGINT"), ("UInt32", "BIGINT"), - (int, "BIGINT"), ("int64", "BIGINT"), ("Int64", "BIGINT"), + (int, "BIGINT" if IS64 else "INTEGER"), ], ) def test_sqlalchemy_integer_mapping(self, integer, expected): From 3db0078b84023f1a22e7b7829df307cb1aba1c48 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Fri, 18 Dec 2020 22:18:16 +0100 Subject: [PATCH 3/5] Check precision of int dtype --- pandas/tests/io/test_sql.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index d1a853ca2537f..9ee8c878bbbf7 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -26,8 +26,6 @@ import numpy as np import pytest -from pandas.compat import IS64 - from pandas.core.dtypes.common import is_datetime64_dtype, is_datetime64tz_dtype import pandas as pd @@ -1158,7 +1156,7 @@ def test_sqlalchemy_type_mapping(self): ("UInt32", "BIGINT"), ("int64", "BIGINT"), ("Int64", "BIGINT"), - (int, "BIGINT" if IS64 else "INTEGER"), + (int, "BIGINT" if np.dtype(int).name == "int64" else "INTEGER"), ], ) def test_sqlalchemy_integer_mapping(self, integer, expected): From 3352b44b50f6ccc7def0e112de91781f31939e6c Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Sun, 20 Dec 2020 23:02:54 +0100 Subject: [PATCH 4/5] Add whatsnew entry --- doc/source/whatsnew/v1.3.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 7671962018144..4ac7fd22e3e87 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -244,6 +244,8 @@ I/O - Allow custom error values for parse_dates argument of :func:`read_sql`, :func:`read_sql_query` and :func:`read_sql_table` (:issue:`35185`) - Bug in :func:`to_hdf` raising ``KeyError`` when trying to apply for subclasses of ``DataFrame`` or ``Series`` (:issue:`33748`). +- Bug in :func:`_sqlalchemy_type` that mapped pandas integer types to non-optimal SQLAlchemy integer types (:issue:`35076`) + Period ^^^^^^ From 0a1f2ecc755ef411d36041121d60d552958d70a1 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Mon, 21 Dec 2020 00:06:16 +0100 Subject: [PATCH 5/5] Update whatsnew --- doc/source/whatsnew/v1.3.0.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 4ac7fd22e3e87..992b40d928ed0 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -41,6 +41,7 @@ Other enhancements - Added :meth:`MultiIndex.dtypes` (:issue:`37062`) - Improve error message when ``usecols`` and ``names`` do not match for :func:`read_csv` and ``engine="c"`` (:issue:`29042`) +- Improved integer type mapping from pandas to SQLAlchemy when using :meth:`DataFrame.to_sql` (:issue:`35076`) .. --------------------------------------------------------------------------- @@ -244,8 +245,6 @@ I/O - Allow custom error values for parse_dates argument of :func:`read_sql`, :func:`read_sql_query` and :func:`read_sql_table` (:issue:`35185`) - Bug in :func:`to_hdf` raising ``KeyError`` when trying to apply for subclasses of ``DataFrame`` or ``Series`` (:issue:`33748`). -- Bug in :func:`_sqlalchemy_type` that mapped pandas integer types to non-optimal SQLAlchemy integer types (:issue:`35076`) - Period ^^^^^^