From 0989711722aef4a871b19720f713a6e401c0e39d Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 14 Dec 2022 00:08:01 +0100 Subject: [PATCH 1/2] ENH: Add use_nullable_dtypes to read_sql_query and table --- doc/source/whatsnew/v2.0.0.rst | 2 ++ pandas/io/sql.py | 20 ++++++++++++++++++++ pandas/tests/io/test_sql.py | 17 ++++++++++------- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 75ca21e3e9f72..f4853d63da8e6 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -38,6 +38,8 @@ The ``use_nullable_dtypes`` keyword argument has been expanded to the following * :func:`read_csv` * :func:`read_excel` * :func:`read_sql` +* :func:`read_sql_query` +* :func:`read_sql_table` Additionally a new global configuration, ``io.nullable_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in the following functions to select the nullable dtypes implementation. diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 4c1dca180c6e9..1532e26352924 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -222,6 +222,7 @@ def read_sql_table( parse_dates: list[str] | dict[str, str] | None = ..., columns: list[str] | None = ..., chunksize: None = ..., + use_nullable_dtypes: bool = ..., ) -> DataFrame: ... @@ -236,6 +237,7 @@ def read_sql_table( parse_dates: list[str] | dict[str, str] | None = ..., columns: list[str] | None = ..., chunksize: int = ..., + use_nullable_dtypes: bool = ..., ) -> Iterator[DataFrame]: ... @@ -249,6 +251,7 @@ def read_sql_table( parse_dates: list[str] | dict[str, str] | None = None, columns: list[str] | None = None, chunksize: int | None = None, + use_nullable_dtypes: bool = False, ) -> DataFrame | Iterator[DataFrame]: """ Read SQL database table into a DataFrame. @@ -285,6 +288,12 @@ def read_sql_table( chunksize : int, default None If specified, returns an iterator where `chunksize` is the number of rows to include in each chunk. + use_nullable_dtypes : bool = False + Whether to use nullable dtypes as default when reading data. If + set to True, nullable dtypes are used for all dtypes that have a nullable + implementation, even if no nulls are present. + + .. versionadded:: 2.0 Returns ------- @@ -316,6 +325,7 @@ def read_sql_table( parse_dates=parse_dates, columns=columns, chunksize=chunksize, + use_nullable_dtypes=use_nullable_dtypes, ) if table is not None: @@ -334,6 +344,7 @@ def read_sql_query( parse_dates: list[str] | dict[str, str] | None = ..., chunksize: None = ..., dtype: DtypeArg | None = ..., + use_nullable_dtypes: bool = ..., ) -> DataFrame: ... @@ -348,6 +359,7 @@ def read_sql_query( parse_dates: list[str] | dict[str, str] | None = ..., chunksize: int = ..., dtype: DtypeArg | None = ..., + use_nullable_dtypes: bool = ..., ) -> Iterator[DataFrame]: ... @@ -361,6 +373,7 @@ def read_sql_query( parse_dates: list[str] | dict[str, str] | None = None, chunksize: int | None = None, dtype: DtypeArg | None = None, + use_nullable_dtypes: bool = False, ) -> DataFrame | Iterator[DataFrame]: """ Read SQL query into a DataFrame. @@ -404,6 +417,12 @@ def read_sql_query( {‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}. .. versionadded:: 1.3.0 + use_nullable_dtypes : bool = False + Whether to use nullable dtypes as default when reading data. If + set to True, nullable dtypes are used for all dtypes that have a nullable + implementation, even if no nulls are present. + + .. versionadded:: 2.0 Returns ------- @@ -428,6 +447,7 @@ def read_sql_query( parse_dates=parse_dates, chunksize=chunksize, dtype=dtype, + use_nullable_dtypes=use_nullable_dtypes, ) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 6764ff27674ab..8396ec6292e50 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2270,22 +2270,23 @@ def test_get_engine_auto_error_message(self): pass # TODO(GH#36893) fill this in when we add more engines + @pytest.mark.parametrize("func", ["read_sql", "read_sql_query"]) @pytest.mark.parametrize("storage", ["pyarrow", "python"]) - def test_read_sql_nullable_dtypes(self, storage): + def test_read_sql_nullable_dtypes(self, storage, func): # GH#50048 table = "test" df = self.nullable_data() df.to_sql(table, self.conn, index=False, if_exists="replace") with pd.option_context("mode.string_storage", storage): - result = pd.read_sql( + result = getattr(pd, func)( f"Select * from {table}", self.conn, use_nullable_dtypes=True ) expected = self.nullable_expected(storage) tm.assert_frame_equal(result, expected) with pd.option_context("mode.string_storage", storage): - iterator = pd.read_sql( + iterator = getattr(pd, func)( f"Select * from {table}", self.conn, use_nullable_dtypes=True, @@ -2295,20 +2296,21 @@ def test_read_sql_nullable_dtypes(self, storage): for result in iterator: tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("func", ["read_sql", "read_sql_table"]) @pytest.mark.parametrize("storage", ["pyarrow", "python"]) - def test_read_sql_nullable_dtypes_table(self, storage): + def test_read_sql_nullable_dtypes_table(self, storage, func): # GH#50048 table = "test" df = self.nullable_data() df.to_sql(table, self.conn, index=False, if_exists="replace") with pd.option_context("mode.string_storage", storage): - result = pd.read_sql(table, self.conn, use_nullable_dtypes=True) + result = getattr(pd, func)(table, self.conn, use_nullable_dtypes=True) expected = self.nullable_expected(storage) tm.assert_frame_equal(result, expected) with pd.option_context("mode.string_storage", storage): - iterator = pd.read_sql( + iterator = getattr(pd, func)( f"Select * from {table}", self.conn, use_nullable_dtypes=True, @@ -2444,8 +2446,9 @@ class Test(BaseModel): def nullable_expected(self, storage) -> DataFrame: return super().nullable_expected(storage).astype({"e": "Int64", "f": "Int64"}) + @pytest.mark.parametrize("func", ["read_sql", "read_sql_table"]) @pytest.mark.parametrize("storage", ["pyarrow", "python"]) - def test_read_sql_nullable_dtypes_table(self, storage): + def test_read_sql_nullable_dtypes_table(self, storage, func): # GH#50048 Not supported for sqlite pass From 7d24b2acee7bf5a713c53b1855bb38a3802b2dec Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 15 Dec 2022 13:57:18 +0100 Subject: [PATCH 2/2] Fix test --- pandas/tests/io/test_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 8396ec6292e50..31bd60711eb00 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2311,7 +2311,7 @@ def test_read_sql_nullable_dtypes_table(self, storage, func): with pd.option_context("mode.string_storage", storage): iterator = getattr(pd, func)( - f"Select * from {table}", + table, self.conn, use_nullable_dtypes=True, chunksize=3,