diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index 3ee660bb85691..1b5b4746336ad 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -573,6 +573,7 @@ relevant columns back to `category` and assign the right categories and categori df2.dtypes df2["cats"] +The same holds for writing to a SQL database with ``to_sql``. Missing Data ------------ diff --git a/doc/source/io.rst b/doc/source/io.rst index e0c6c79380bea..066a9af472c24 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -3337,6 +3337,14 @@ With some databases, writing large DataFrames can result in errors due to packet flavors, columns with type ``timedelta64`` will be written as integer values as nanoseconds to the database and a warning will be raised. +.. note:: + + Columns of ``category`` dtype will be converted to the dense representation + as you would get with ``np.asarray(categorical)`` (e.g. for string categories + this gives an array of strings). + Because of this, reading the database table back in does **not** generate + a categorical. + Reading Tables ~~~~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v0.15.1.txt b/doc/source/whatsnew/v0.15.1.txt index 0755931bed990..c87c56953f2a2 100644 --- a/doc/source/whatsnew/v0.15.1.txt +++ b/doc/source/whatsnew/v0.15.1.txt @@ -131,7 +131,7 @@ Bug Fixes - Bug in ``Categorical`` not created properly with ``Series.to_frame()`` (:issue:`8626`) - Bug in coercing in astype of a ``Categorical`` of a passed ``pd.Categorical`` (this now raises ``TypeError`` correctly), (:issue:`8626`) - Bug in ``cut``/``qcut`` when using ``Series`` and ``retbins=True`` (:issue:`8589`) - +- Bug in writing Categorical columns to an SQL database with ``to_sql`` (:issue:`8624`). diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 09acfcaee976b..0ae82bec38e26 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -670,7 +670,7 @@ def insert_data(self): # datetime.datetime d = b.values.astype('M8[us]').astype(object) else: - d = np.array(b.values, dtype=object) + d = np.array(b.get_values(), dtype=object) # replace NaN with None if b._can_hold_na: diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 2099a8d0de82e..74f1602a0f603 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -678,6 +678,20 @@ def test_chunksize_read(self): tm.assert_frame_equal(res1, res3) + def test_categorical(self): + # GH8624 + # test that categorical gets written correctly as dense column + df = DataFrame( + {'person_id': [1, 2, 3], + 'person_name': ['John P. Doe', 'Jane Dove', 'John P. Doe']}) + df2 = df.copy() + df2['person_name'] = df2['person_name'].astype('category') + + df2.to_sql('test_categorical', self.conn, index=False) + res = sql.read_sql_query('SELECT * FROM test_categorical', self.conn) + + tm.assert_frame_equal(res, df) + class TestSQLApi(_TestSQLApi): """