diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index c58ee3818cbd9..4d9aa1673b289 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -38,6 +38,8 @@ The ``use_nullable_dtypes`` keyword argument has been expanded to the following * :func:`read_csv` * :func:`read_excel` * :func:`read_sql` +* :func:`read_sql_query` +* :func:`read_sql_table` Additionally a new global configuration, ``mode.nullable_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in the following functions to select the nullable dtypes implementation. diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 1304e5d59063f..2b845786b0366 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -224,6 +224,7 @@ def read_sql_table( parse_dates: list[str] | dict[str, str] | None = ..., columns: list[str] | None = ..., chunksize: None = ..., + use_nullable_dtypes: bool = ..., ) -> DataFrame: ... @@ -238,6 +239,7 @@ def read_sql_table( parse_dates: list[str] | dict[str, str] | None = ..., columns: list[str] | None = ..., chunksize: int = ..., + use_nullable_dtypes: bool = ..., ) -> Iterator[DataFrame]: ... @@ -251,6 +253,7 @@ def read_sql_table( parse_dates: list[str] | dict[str, str] | None = None, columns: list[str] | None = None, chunksize: int | None = None, + use_nullable_dtypes: bool = False, ) -> DataFrame | Iterator[DataFrame]: """ Read SQL database table into a DataFrame. @@ -287,6 +290,12 @@ def read_sql_table( chunksize : int, default None If specified, returns an iterator where `chunksize` is the number of rows to include in each chunk. + use_nullable_dtypes : bool = False + Whether to use nullable dtypes as default when reading data. If + set to True, nullable dtypes are used for all dtypes that have a nullable + implementation, even if no nulls are present. + + .. versionadded:: 2.0 Returns ------- @@ -318,6 +327,7 @@ def read_sql_table( parse_dates=parse_dates, columns=columns, chunksize=chunksize, + use_nullable_dtypes=use_nullable_dtypes, ) if table is not None: @@ -336,6 +346,7 @@ def read_sql_query( parse_dates: list[str] | dict[str, str] | None = ..., chunksize: None = ..., dtype: DtypeArg | None = ..., + use_nullable_dtypes: bool = ..., ) -> DataFrame: ... @@ -350,6 +361,7 @@ def read_sql_query( parse_dates: list[str] | dict[str, str] | None = ..., chunksize: int = ..., dtype: DtypeArg | None = ..., + use_nullable_dtypes: bool = ..., ) -> Iterator[DataFrame]: ... @@ -363,6 +375,7 @@ def read_sql_query( parse_dates: list[str] | dict[str, str] | None = None, chunksize: int | None = None, dtype: DtypeArg | None = None, + use_nullable_dtypes: bool = False, ) -> DataFrame | Iterator[DataFrame]: """ Read SQL query into a DataFrame. @@ -406,6 +419,12 @@ def read_sql_query( {‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}. .. versionadded:: 1.3.0 + use_nullable_dtypes : bool = False + Whether to use nullable dtypes as default when reading data. If + set to True, nullable dtypes are used for all dtypes that have a nullable + implementation, even if no nulls are present. + + .. versionadded:: 2.0 Returns ------- @@ -430,6 +449,7 @@ def read_sql_query( parse_dates=parse_dates, chunksize=chunksize, dtype=dtype, + use_nullable_dtypes=use_nullable_dtypes, ) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 0fe5a81c0e685..31ca060e36ad1 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2276,21 +2276,22 @@ def test_get_engine_auto_error_message(self): pass # TODO(GH#36893) fill this in when we add more engines - def test_read_sql_nullable_dtypes(self, string_storage): + @pytest.mark.parametrize("func", ["read_sql", "read_sql_query"]) + def test_read_sql_nullable_dtypes(self, string_storage, func): # GH#50048 table = "test" df = self.nullable_data() df.to_sql(table, self.conn, index=False, if_exists="replace") with pd.option_context("mode.string_storage", string_storage): - result = pd.read_sql( + result = getattr(pd, func)( f"Select * from {table}", self.conn, use_nullable_dtypes=True ) expected = self.nullable_expected(string_storage) tm.assert_frame_equal(result, expected) with pd.option_context("mode.string_storage", string_storage): - iterator = pd.read_sql( + iterator = getattr(pd, func)( f"Select * from {table}", self.conn, use_nullable_dtypes=True, @@ -2300,20 +2301,21 @@ def test_read_sql_nullable_dtypes(self, string_storage): for result in iterator: tm.assert_frame_equal(result, expected) - def test_read_sql_nullable_dtypes_table(self, string_storage): + @pytest.mark.parametrize("func", ["read_sql", "read_sql_table"]) + def test_read_sql_nullable_dtypes_table(self, string_storage, func): # GH#50048 table = "test" df = self.nullable_data() df.to_sql(table, self.conn, index=False, if_exists="replace") with pd.option_context("mode.string_storage", string_storage): - result = pd.read_sql(table, self.conn, use_nullable_dtypes=True) + result = getattr(pd, func)(table, self.conn, use_nullable_dtypes=True) expected = self.nullable_expected(string_storage) tm.assert_frame_equal(result, expected) with pd.option_context("mode.string_storage", string_storage): - iterator = pd.read_sql( - f"Select * from {table}", + iterator = getattr(pd, func)( + table, self.conn, use_nullable_dtypes=True, chunksize=3, @@ -2463,7 +2465,8 @@ class Test(BaseModel): def nullable_expected(self, storage) -> DataFrame: return super().nullable_expected(storage).astype({"e": "Int64", "f": "Int64"}) - def test_read_sql_nullable_dtypes_table(self, string_storage): + @pytest.mark.parametrize("func", ["read_sql", "read_sql_table"]) + def test_read_sql_nullable_dtypes_table(self, string_storage, func): # GH#50048 Not supported for sqlite pass