Skip to content

Commit 0989711

Browse files
committed
ENH: Add use_nullable_dtypes to read_sql_query and table
1 parent 0189674 commit 0989711

File tree

3 files changed

+32
-7
lines changed

3 files changed

+32
-7
lines changed

doc/source/whatsnew/v2.0.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ The ``use_nullable_dtypes`` keyword argument has been expanded to the following
3838
* :func:`read_csv`
3939
* :func:`read_excel`
4040
* :func:`read_sql`
41+
* :func:`read_sql_query`
42+
* :func:`read_sql_table`
4143

4244
Additionally a new global configuration, ``io.nullable_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in the following functions
4345
to select the nullable dtypes implementation.

pandas/io/sql.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ def read_sql_table(
222222
parse_dates: list[str] | dict[str, str] | None = ...,
223223
columns: list[str] | None = ...,
224224
chunksize: None = ...,
225+
use_nullable_dtypes: bool = ...,
225226
) -> DataFrame:
226227
...
227228

@@ -236,6 +237,7 @@ def read_sql_table(
236237
parse_dates: list[str] | dict[str, str] | None = ...,
237238
columns: list[str] | None = ...,
238239
chunksize: int = ...,
240+
use_nullable_dtypes: bool = ...,
239241
) -> Iterator[DataFrame]:
240242
...
241243

@@ -249,6 +251,7 @@ def read_sql_table(
249251
parse_dates: list[str] | dict[str, str] | None = None,
250252
columns: list[str] | None = None,
251253
chunksize: int | None = None,
254+
use_nullable_dtypes: bool = False,
252255
) -> DataFrame | Iterator[DataFrame]:
253256
"""
254257
Read SQL database table into a DataFrame.
@@ -285,6 +288,12 @@ def read_sql_table(
285288
chunksize : int, default None
286289
If specified, returns an iterator where `chunksize` is the number of
287290
rows to include in each chunk.
291+
use_nullable_dtypes : bool = False
292+
Whether to use nullable dtypes as default when reading data. If
293+
set to True, nullable dtypes are used for all dtypes that have a nullable
294+
implementation, even if no nulls are present.
295+
296+
.. versionadded:: 2.0
288297
289298
Returns
290299
-------
@@ -316,6 +325,7 @@ def read_sql_table(
316325
parse_dates=parse_dates,
317326
columns=columns,
318327
chunksize=chunksize,
328+
use_nullable_dtypes=use_nullable_dtypes,
319329
)
320330

321331
if table is not None:
@@ -334,6 +344,7 @@ def read_sql_query(
334344
parse_dates: list[str] | dict[str, str] | None = ...,
335345
chunksize: None = ...,
336346
dtype: DtypeArg | None = ...,
347+
use_nullable_dtypes: bool = ...,
337348
) -> DataFrame:
338349
...
339350

@@ -348,6 +359,7 @@ def read_sql_query(
348359
parse_dates: list[str] | dict[str, str] | None = ...,
349360
chunksize: int = ...,
350361
dtype: DtypeArg | None = ...,
362+
use_nullable_dtypes: bool = ...,
351363
) -> Iterator[DataFrame]:
352364
...
353365

@@ -361,6 +373,7 @@ def read_sql_query(
361373
parse_dates: list[str] | dict[str, str] | None = None,
362374
chunksize: int | None = None,
363375
dtype: DtypeArg | None = None,
376+
use_nullable_dtypes: bool = False,
364377
) -> DataFrame | Iterator[DataFrame]:
365378
"""
366379
Read SQL query into a DataFrame.
@@ -404,6 +417,12 @@ def read_sql_query(
404417
{‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}.
405418
406419
.. versionadded:: 1.3.0
420+
use_nullable_dtypes : bool = False
421+
Whether to use nullable dtypes as default when reading data. If
422+
set to True, nullable dtypes are used for all dtypes that have a nullable
423+
implementation, even if no nulls are present.
424+
425+
.. versionadded:: 2.0
407426
408427
Returns
409428
-------
@@ -428,6 +447,7 @@ def read_sql_query(
428447
parse_dates=parse_dates,
429448
chunksize=chunksize,
430449
dtype=dtype,
450+
use_nullable_dtypes=use_nullable_dtypes,
431451
)
432452

433453

pandas/tests/io/test_sql.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2270,22 +2270,23 @@ def test_get_engine_auto_error_message(self):
22702270
pass
22712271
# TODO(GH#36893) fill this in when we add more engines
22722272

2273+
@pytest.mark.parametrize("func", ["read_sql", "read_sql_query"])
22732274
@pytest.mark.parametrize("storage", ["pyarrow", "python"])
2274-
def test_read_sql_nullable_dtypes(self, storage):
2275+
def test_read_sql_nullable_dtypes(self, storage, func):
22752276
# GH#50048
22762277
table = "test"
22772278
df = self.nullable_data()
22782279
df.to_sql(table, self.conn, index=False, if_exists="replace")
22792280

22802281
with pd.option_context("mode.string_storage", storage):
2281-
result = pd.read_sql(
2282+
result = getattr(pd, func)(
22822283
f"Select * from {table}", self.conn, use_nullable_dtypes=True
22832284
)
22842285
expected = self.nullable_expected(storage)
22852286
tm.assert_frame_equal(result, expected)
22862287

22872288
with pd.option_context("mode.string_storage", storage):
2288-
iterator = pd.read_sql(
2289+
iterator = getattr(pd, func)(
22892290
f"Select * from {table}",
22902291
self.conn,
22912292
use_nullable_dtypes=True,
@@ -2295,20 +2296,21 @@ def test_read_sql_nullable_dtypes(self, storage):
22952296
for result in iterator:
22962297
tm.assert_frame_equal(result, expected)
22972298

2299+
@pytest.mark.parametrize("func", ["read_sql", "read_sql_table"])
22982300
@pytest.mark.parametrize("storage", ["pyarrow", "python"])
2299-
def test_read_sql_nullable_dtypes_table(self, storage):
2301+
def test_read_sql_nullable_dtypes_table(self, storage, func):
23002302
# GH#50048
23012303
table = "test"
23022304
df = self.nullable_data()
23032305
df.to_sql(table, self.conn, index=False, if_exists="replace")
23042306

23052307
with pd.option_context("mode.string_storage", storage):
2306-
result = pd.read_sql(table, self.conn, use_nullable_dtypes=True)
2308+
result = getattr(pd, func)(table, self.conn, use_nullable_dtypes=True)
23072309
expected = self.nullable_expected(storage)
23082310
tm.assert_frame_equal(result, expected)
23092311

23102312
with pd.option_context("mode.string_storage", storage):
2311-
iterator = pd.read_sql(
2313+
iterator = getattr(pd, func)(
23122314
f"Select * from {table}",
23132315
self.conn,
23142316
use_nullable_dtypes=True,
@@ -2444,8 +2446,9 @@ class Test(BaseModel):
24442446
def nullable_expected(self, storage) -> DataFrame:
24452447
return super().nullable_expected(storage).astype({"e": "Int64", "f": "Int64"})
24462448

2449+
@pytest.mark.parametrize("func", ["read_sql", "read_sql_table"])
24472450
@pytest.mark.parametrize("storage", ["pyarrow", "python"])
2448-
def test_read_sql_nullable_dtypes_table(self, storage):
2451+
def test_read_sql_nullable_dtypes_table(self, storage, func):
24492452
# GH#50048 Not supported for sqlite
24502453
pass
24512454

0 commit comments

Comments
 (0)