Skip to content

ENH: Add use_nullable_dtypes for read_fwf #50289

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ Configuration option, ``mode.nullable_backend``, to return pyarrow-backed dtypes
The ``use_nullable_dtypes`` keyword argument has been expanded to the following functions to enable automatic conversion to nullable dtypes (:issue:`36712`)

* :func:`read_csv`
* :func:`read_fwf`
* :func:`read_excel`
* :func:`read_sql`

Expand Down
9 changes: 9 additions & 0 deletions pandas/io/parsers/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1226,6 +1226,7 @@ def read_fwf(
colspecs: Sequence[tuple[int, int]] | str | None = "infer",
widths: Sequence[int] | None = None,
infer_nrows: int = 100,
use_nullable_dtypes: bool = False,
**kwds,
) -> DataFrame | TextFileReader:
r"""
Expand Down Expand Up @@ -1257,6 +1258,13 @@ def read_fwf(
infer_nrows : int, default 100
The number of rows to consider when letting the parser determine the
`colspecs`.
use_nullable_dtypes : bool = False
Whether or not to use nullable dtypes as default when reading data. If
set to True, nullable dtypes are used for all dtypes that have a nullable
implementation, even if no nulls are present.

.. versionadded:: 2.0

**kwds : optional
Optional keyword arguments can be passed to ``TextFileReader``.

Expand Down Expand Up @@ -1313,6 +1321,7 @@ def read_fwf(
kwds["colspecs"] = colspecs
kwds["infer_nrows"] = infer_nrows
kwds["engine"] = "python-fwf"
kwds["use_nullable_dtypes"] = use_nullable_dtypes
return _read(filepath_or_buffer, kwds)


Expand Down
39 changes: 39 additions & 0 deletions pandas/tests/io/parser/test_read_fwf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,16 @@

from pandas.errors import EmptyDataError

import pandas as pd
from pandas import (
DataFrame,
DatetimeIndex,
)
import pandas._testing as tm
from pandas.core.arrays import (
ArrowStringArray,
StringArray,
)
from pandas.tests.io.test_compression import _compression_to_extension

from pandas.io.parsers import (
Expand Down Expand Up @@ -941,3 +946,37 @@ def test_widths_and_usecols():
}
)
tm.assert_frame_equal(result, expected)


def test_use_nullable_dtypes(string_storage):
# GH#50289

data = """a b c d e f g h i
1 2.5 True a
3 4.5 False b True 6 7.5 a"""
with pd.option_context("mode.string_storage", string_storage):
result = read_fwf(StringIO(data), use_nullable_dtypes=True)

if string_storage == "python":
arr = StringArray(np.array(["a", "b"], dtype=np.object_))
arr_na = StringArray(np.array([pd.NA, "a"], dtype=np.object_))
else:
import pyarrow as pa

arr = ArrowStringArray(pa.array(["a", "b"]))
arr_na = ArrowStringArray(pa.array([None, "a"]))

expected = DataFrame(
{
"a": pd.Series([1, 3], dtype="Int64"),
"b": pd.Series([2.5, 4.5], dtype="Float64"),
"c": pd.Series([True, False], dtype="boolean"),
"d": arr,
"e": pd.Series([pd.NA, True], dtype="boolean"),
"f": pd.Series([pd.NA, 6], dtype="Int64"),
"g": pd.Series([pd.NA, 7.5], dtype="Float64"),
"h": arr_na,
"i": pd.Series([pd.NA, pd.NA], dtype="Int64"),
}
)
tm.assert_frame_equal(result, expected)