diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index c3d896166fabe..c7573ee860744 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -44,6 +44,7 @@ Other enhancements - Improve error message when ``usecols`` and ``names`` do not match for :func:`read_csv` and ``engine="c"`` (:issue:`29042`) - Improved consistency of error message when passing an invalid ``win_type`` argument in :class:`Window` (:issue:`15969`) - :func:`pandas.read_sql_query` now accepts a ``dtype`` argument to cast the columnar data from the SQL database based on user input (:issue:`10285`) +- Improved integer type mapping from pandas to SQLAlchemy when using :meth:`DataFrame.to_sql` (:issue:`35076`) .. --------------------------------------------------------------------------- diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 0ad9140f2a757..d4aab05b22adf 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1124,6 +1124,7 @@ def _sqlalchemy_type(self, col): DateTime, Float, Integer, + SmallInteger, Text, Time, ) @@ -1154,8 +1155,13 @@ def _sqlalchemy_type(self, col): else: return Float(precision=53) elif col_type == "integer": - if col.dtype == "int32": + # GH35076 Map pandas integer to optimal SQLAlchemy integer type + if col.dtype.name.lower() in ("int8", "uint8", "int16"): + return SmallInteger + elif col.dtype.name.lower() in ("uint16", "int32"): return Integer + elif col.dtype.name.lower() == "uint64": + raise ValueError("Unsigned 64 bit integer datatype is not supported") else: return BigInteger elif col_type == "boolean": diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index fdd42ec0cc5ab..df0815fc52bba 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1160,6 +1160,45 @@ def test_sqlalchemy_type_mapping(self): # GH 9086: TIMESTAMP is the suggested type for datetimes with timezones assert isinstance(table.table.c["time"].type, sqltypes.TIMESTAMP) + @pytest.mark.parametrize( + "integer, expected", + [ + ("int8", "SMALLINT"), + ("Int8", "SMALLINT"), + ("uint8", "SMALLINT"), + ("UInt8", "SMALLINT"), + ("int16", "SMALLINT"), + ("Int16", "SMALLINT"), + ("uint16", "INTEGER"), + ("UInt16", "INTEGER"), + ("int32", "INTEGER"), + ("Int32", "INTEGER"), + ("uint32", "BIGINT"), + ("UInt32", "BIGINT"), + ("int64", "BIGINT"), + ("Int64", "BIGINT"), + (int, "BIGINT" if np.dtype(int).name == "int64" else "INTEGER"), + ], + ) + def test_sqlalchemy_integer_mapping(self, integer, expected): + # GH35076 Map pandas integer to optimal SQLAlchemy integer type + df = DataFrame([0, 1], columns=["a"], dtype=integer) + db = sql.SQLDatabase(self.conn) + table = sql.SQLTable("test_type", db, frame=df) + + result = str(table.table.c.a.type) + assert result == expected + + @pytest.mark.parametrize("integer", ["uint64", "UInt64"]) + def test_sqlalchemy_integer_overload_mapping(self, integer): + # GH35076 Map pandas integer to optimal SQLAlchemy integer type + df = DataFrame([0, 1], columns=["a"], dtype=integer) + db = sql.SQLDatabase(self.conn) + with pytest.raises( + ValueError, match="Unsigned 64 bit integer datatype is not supported" + ): + sql.SQLTable("test_type", db, frame=df) + def test_database_uri_string(self): # Test read_sql and .to_sql method with a database URI (GH10654)