diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 5c635f2d9d3be..bcec5fc27da5e 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -36,6 +36,7 @@ Configuration option, ``mode.nullable_backend``, to return pyarrow-backed dtypes The ``use_nullable_dtypes`` keyword argument has been expanded to the following functions to enable automatic conversion to nullable dtypes (:issue:`36712`) * :func:`read_csv` +* :func:`read_fwf` * :func:`read_excel` * :func:`read_sql` diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 0690ebfae727f..b88e0d04064db 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -1226,6 +1226,7 @@ def read_fwf( colspecs: Sequence[tuple[int, int]] | str | None = "infer", widths: Sequence[int] | None = None, infer_nrows: int = 100, + use_nullable_dtypes: bool = False, **kwds, ) -> DataFrame | TextFileReader: r""" @@ -1257,6 +1258,13 @@ def read_fwf( infer_nrows : int, default 100 The number of rows to consider when letting the parser determine the `colspecs`. + use_nullable_dtypes : bool = False + Whether or not to use nullable dtypes as default when reading data. If + set to True, nullable dtypes are used for all dtypes that have a nullable + implementation, even if no nulls are present. + + .. versionadded:: 2.0 + **kwds : optional Optional keyword arguments can be passed to ``TextFileReader``. @@ -1313,6 +1321,7 @@ def read_fwf( kwds["colspecs"] = colspecs kwds["infer_nrows"] = infer_nrows kwds["engine"] = "python-fwf" + kwds["use_nullable_dtypes"] = use_nullable_dtypes return _read(filepath_or_buffer, kwds) diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index 61c493a2c368f..0dc8ee81278dd 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -16,11 +16,16 @@ from pandas.errors import EmptyDataError +import pandas as pd from pandas import ( DataFrame, DatetimeIndex, ) import pandas._testing as tm +from pandas.core.arrays import ( + ArrowStringArray, + StringArray, +) from pandas.tests.io.test_compression import _compression_to_extension from pandas.io.parsers import ( @@ -941,3 +946,37 @@ def test_widths_and_usecols(): } ) tm.assert_frame_equal(result, expected) + + +def test_use_nullable_dtypes(string_storage): + # GH#50289 + + data = """a b c d e f g h i +1 2.5 True a +3 4.5 False b True 6 7.5 a""" + with pd.option_context("mode.string_storage", string_storage): + result = read_fwf(StringIO(data), use_nullable_dtypes=True) + + if string_storage == "python": + arr = StringArray(np.array(["a", "b"], dtype=np.object_)) + arr_na = StringArray(np.array([pd.NA, "a"], dtype=np.object_)) + else: + import pyarrow as pa + + arr = ArrowStringArray(pa.array(["a", "b"])) + arr_na = ArrowStringArray(pa.array([None, "a"])) + + expected = DataFrame( + { + "a": pd.Series([1, 3], dtype="Int64"), + "b": pd.Series([2.5, 4.5], dtype="Float64"), + "c": pd.Series([True, False], dtype="boolean"), + "d": arr, + "e": pd.Series([pd.NA, True], dtype="boolean"), + "f": pd.Series([pd.NA, 6], dtype="Int64"), + "g": pd.Series([pd.NA, 7.5], dtype="Float64"), + "h": arr_na, + "i": pd.Series([pd.NA, pd.NA], dtype="Int64"), + } + ) + tm.assert_frame_equal(result, expected)