Skip to content

REF: Rename mode.nullable_backend to mode.dtype_backend #50291

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Dec 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ The available extras, found in the :ref:`installation guide<install.dependencies
``[all, performance, computation, timezone, fss, aws, gcp, excel, parquet, feather, hdf5, spss, postgresql, mysql,
sql-other, html, xml, plot, output_formatting, clipboard, compression, test]`` (:issue:`39164`).

.. _whatsnew_200.enhancements.io_use_nullable_dtypes_and_nullable_backend:
.. _whatsnew_200.enhancements.io_use_nullable_dtypes_and_dtype_backend:

Configuration option, ``mode.nullable_backend``, to return pyarrow-backed dtypes
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Configuration option, ``mode.dtype_backend``, to return pyarrow-backed dtypes
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

The ``use_nullable_dtypes`` keyword argument has been expanded to the following functions to enable automatic conversion to nullable dtypes (:issue:`36712`)

Expand All @@ -41,7 +41,7 @@ The ``use_nullable_dtypes`` keyword argument has been expanded to the following
* :func:`read_sql_query`
* :func:`read_sql_table`

Additionally a new global configuration, ``mode.nullable_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in the following functions
Additionally a new global configuration, ``mode.dtype_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in the following functions
to select the nullable dtypes implementation.

* :func:`read_csv` (with ``engine="pyarrow"`` or ``engine="python"``)
Expand All @@ -50,12 +50,12 @@ to select the nullable dtypes implementation.
* :func:`read_orc`


And the following methods will also utilize the ``mode.nullable_backend`` option.
And the following methods will also utilize the ``mode.dtype_backend`` option.

* :meth:`DataFrame.convert_dtypes`
* :meth:`Series.convert_dtypes`

By default, ``mode.nullable_backend`` is set to ``"pandas"`` to return existing, numpy-backed nullable dtypes, but it can also
By default, ``mode.dtype_backend`` is set to ``"pandas"`` to return existing, numpy-backed nullable dtypes, but it can also
be set to ``"pyarrow"`` to return pyarrow-backed, nullable :class:`ArrowDtype` (:issue:`48957`, :issue:`49997`).

.. ipython:: python
Expand All @@ -65,12 +65,12 @@ be set to ``"pyarrow"`` to return pyarrow-backed, nullable :class:`ArrowDtype` (
1,2.5,True,a,,,,,
3,4.5,False,b,6,7.5,True,a,
""")
with pd.option_context("mode.nullable_backend", "pandas"):
with pd.option_context("mode.dtype_backend", "pandas"):
df = pd.read_csv(data, use_nullable_dtypes=True)
df.dtypes

data.seek(0)
with pd.option_context("mode.nullable_backend", "pyarrow"):
with pd.option_context("mode.dtype_backend", "pyarrow"):
df_pyarrow = pd.read_csv(data, use_nullable_dtypes=True, engine="pyarrow")
df_pyarrow.dtypes

Expand Down
11 changes: 6 additions & 5 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,10 +539,11 @@ def use_inf_as_na_cb(key) -> None:
The default storage for StringDtype.
"""

nullable_backend_doc = """
dtype_backend_doc = """
: string
The nullable dtype implementation to return.
Available options: 'pandas', 'pyarrow', the default is 'pandas'.
The nullable dtype implementation to return. Only applicable to certain
operations where documented. Available options: 'pandas', 'pyarrow',
the default is 'pandas'.
"""

with cf.config_prefix("mode"):
Expand All @@ -553,9 +554,9 @@ def use_inf_as_na_cb(key) -> None:
validator=is_one_of_factory(["python", "pyarrow"]),
)
cf.register_option(
"nullable_backend",
"dtype_backend",
"pandas",
nullable_backend_doc,
dtype_backend_doc,
validator=is_one_of_factory(["pandas", "pyarrow"]),
)

Expand Down
6 changes: 3 additions & 3 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -961,7 +961,7 @@ def convert_dtypes(
convert_boolean: bool = True,
convert_floating: bool = True,
infer_objects: bool = False,
nullable_backend: Literal["pandas", "pyarrow"] = "pandas",
dtype_backend: Literal["pandas", "pyarrow"] = "pandas",
) -> DtypeObj:
"""
Convert objects to best possible type, and optionally,
Expand All @@ -983,7 +983,7 @@ def convert_dtypes(
infer_objects : bool, defaults False
Whether to also infer objects to float/int if possible. Is only hit if the
object array contains pd.NA.
nullable_backend : str, default "pandas"
dtype_backend : str, default "pandas"
Nullable dtype implementation to use.

* "pandas" returns numpy-backed nullable types
Expand Down Expand Up @@ -1076,7 +1076,7 @@ def convert_dtypes(
else:
inferred_dtype = input_array.dtype

if nullable_backend == "pyarrow":
if dtype_backend == "pyarrow":
from pandas.core.arrays.arrow.array import to_pyarrow_type
from pandas.core.arrays.arrow.dtype import ArrowDtype
from pandas.core.arrays.string_ import StringDtype
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6435,9 +6435,9 @@ def convert_dtypes(

.. versionadded:: 2.0
The nullable dtype implementation can be configured by calling
``pd.set_option("mode.nullable_backend", "pandas")`` to use
``pd.set_option("mode.dtype_backend", "pandas")`` to use
numpy-backed nullable dtypes or
``pd.set_option("mode.nullable_backend", "pyarrow")`` to use
``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).

Examples
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -5410,15 +5410,15 @@ def _convert_dtypes(
input_series = input_series.copy()

if convert_string or convert_integer or convert_boolean or convert_floating:
nullable_backend = get_option("mode.nullable_backend")
dtype_backend = get_option("mode.dtype_backend")
inferred_dtype = convert_dtypes(
input_series._values,
convert_string,
convert_integer,
convert_boolean,
convert_floating,
infer_objects,
nullable_backend,
dtype_backend,
)
result = input_series.astype(inferred_dtype)
else:
Expand Down
12 changes: 6 additions & 6 deletions pandas/io/orc.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,16 +59,16 @@ def read_orc(
for the resulting DataFrame.

The nullable dtype implementation can be configured by calling
``pd.set_option("mode.nullable_backend", "pandas")`` to use
``pd.set_option("mode.dtype_backend", "pandas")`` to use
numpy-backed nullable dtypes or
``pd.set_option("mode.nullable_backend", "pyarrow")`` to use
``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).

.. versionadded:: 2.0.0

.. note

Currently only ``mode.nullable_backend`` set to ``"pyarrow"`` is supported.
Currently only ``mode.dtype_backend`` set to ``"pyarrow"`` is supported.

**kwargs
Any additional kwargs are passed to pyarrow.
Expand All @@ -90,10 +90,10 @@ def read_orc(
orc_file = orc.ORCFile(handles.handle)
pa_table = orc_file.read(columns=columns, **kwargs)
if use_nullable_dtypes:
nullable_backend = get_option("mode.nullable_backend")
if nullable_backend != "pyarrow":
dtype_backend = get_option("mode.dtype_backend")
if dtype_backend != "pyarrow":
raise NotImplementedError(
f"mode.nullable_backend set to {nullable_backend} is not implemented."
f"mode.dtype_backend set to {dtype_backend} is not implemented."
)
df = DataFrame(
{
Expand Down
12 changes: 6 additions & 6 deletions pandas/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,12 +222,12 @@ def read(
) -> DataFrame:
kwargs["use_pandas_metadata"] = True

nullable_backend = get_option("mode.nullable_backend")
dtype_backend = get_option("mode.dtype_backend")
to_pandas_kwargs = {}
if use_nullable_dtypes:
import pandas as pd

if nullable_backend == "pandas":
if dtype_backend == "pandas":
mapping = {
self.api.int8(): pd.Int8Dtype(),
self.api.int16(): pd.Int16Dtype(),
Expand Down Expand Up @@ -257,9 +257,9 @@ def read(
pa_table = self.api.parquet.read_table(
path_or_handle, columns=columns, **kwargs
)
if nullable_backend == "pandas":
if dtype_backend == "pandas":
result = pa_table.to_pandas(**to_pandas_kwargs)
elif nullable_backend == "pyarrow":
elif dtype_backend == "pyarrow":
result = DataFrame(
{
col_name: arrays.ArrowExtensionArray(pa_col)
Expand Down Expand Up @@ -509,9 +509,9 @@ def read_parquet(
.. versionadded:: 1.2.0

The nullable dtype implementation can be configured by calling
``pd.set_option("mode.nullable_backend", "pandas")`` to use
``pd.set_option("mode.dtype_backend", "pandas")`` to use
numpy-backed nullable dtypes or
``pd.set_option("mode.nullable_backend", "pyarrow")`` to use
``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).

.. versionadded:: 2.0.0
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/parsers/arrow_parser_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def read(self) -> DataFrame:
)
if (
self.kwds["use_nullable_dtypes"]
and get_option("mode.nullable_backend") == "pyarrow"
and get_option("mode.dtype_backend") == "pyarrow"
):
frame = DataFrame(
{
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/parsers/base_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -712,7 +712,7 @@ def _infer_types(
use_nullable_dtypes: Literal[True] | Literal[False] = (
self.use_nullable_dtypes and no_dtype_specified
)
nullable_backend = get_option("mode.nullable_backend")
dtype_backend = get_option("mode.dtype_backend")
result: ArrayLike

if try_num_bool and is_object_dtype(values.dtype):
Expand Down Expand Up @@ -770,7 +770,7 @@ def _infer_types(
if inferred_type != "datetime":
result = StringDtype().construct_array_type()._from_sequence(values)

if use_nullable_dtypes and nullable_backend == "pyarrow":
if use_nullable_dtypes and dtype_backend == "pyarrow":
pa = import_optional_dependency("pyarrow")
if isinstance(result, np.ndarray):
result = ArrowExtensionArray(pa.array(result, from_pandas=True))
Expand Down
8 changes: 4 additions & 4 deletions pandas/io/parsers/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,9 +399,9 @@
implementation, even if no nulls are present.

The nullable dtype implementation can be configured by calling
``pd.set_option("mode.nullable_backend", "pandas")`` to use
``pd.set_option("mode.dtype_backend", "pandas")`` to use
numpy-backed nullable dtypes or
``pd.set_option("mode.nullable_backend", "pyarrow")`` to use
``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).

.. versionadded:: 2.0
Expand Down Expand Up @@ -561,12 +561,12 @@ def _read(
)
elif (
kwds.get("use_nullable_dtypes", False)
and get_option("mode.nullable_backend") == "pyarrow"
and get_option("mode.dtype_backend") == "pyarrow"
and kwds.get("engine") == "c"
):
raise NotImplementedError(
f"use_nullable_dtypes=True and engine={kwds['engine']} with "
"mode.nullable_backend set to 'pyarrow' is not implemented."
"mode.dtype_backend set to 'pyarrow' is not implemented."
)
else:
chunksize = validate_integer("chunksize", chunksize, 1)
Expand Down
12 changes: 6 additions & 6 deletions pandas/tests/frame/methods/test_convert_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_convert_dtypes_retain_column_names(self):
tm.assert_index_equal(result.columns, df.columns)
assert result.columns.name == "cols"

def test_pyarrow_nullable_backend(self):
def test_pyarrow_dtype_backend(self):
pa = pytest.importorskip("pyarrow")
df = pd.DataFrame(
{
Expand All @@ -56,7 +56,7 @@ def test_pyarrow_nullable_backend(self):
"f": pd.Series(pd.timedelta_range("1D", periods=3)),
}
)
with pd.option_context("mode.nullable_backend", "pyarrow"):
with pd.option_context("mode.dtype_backend", "pyarrow"):
result = df.convert_dtypes()
expected = pd.DataFrame(
{
Expand Down Expand Up @@ -90,14 +90,14 @@ def test_pyarrow_nullable_backend(self):
)
tm.assert_frame_equal(result, expected)

def test_pyarrow_nullable_backend_already_pyarrow(self):
def test_pyarrow_dtype_backend_already_pyarrow(self):
pytest.importorskip("pyarrow")
expected = pd.DataFrame([1, 2, 3], dtype="int64[pyarrow]")
with pd.option_context("mode.nullable_backend", "pyarrow"):
with pd.option_context("mode.dtype_backend", "pyarrow"):
result = expected.convert_dtypes()
tm.assert_frame_equal(result, expected)

def test_pyarrow_nullable_backend_from_pandas_nullable(self):
def test_pyarrow_dtype_backend_from_pandas_nullable(self):
pa = pytest.importorskip("pyarrow")
df = pd.DataFrame(
{
Expand All @@ -107,7 +107,7 @@ def test_pyarrow_nullable_backend_from_pandas_nullable(self):
"d": pd.Series([None, 100.5, 200], dtype="Float64"),
}
)
with pd.option_context("mode.nullable_backend", "pyarrow"):
with pd.option_context("mode.dtype_backend", "pyarrow"):
result = df.convert_dtypes()
expected = pd.DataFrame(
{
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,10 +537,10 @@ def test_reader_dtype_str(self, read_ext, dtype, expected):
tm.assert_frame_equal(actual, expected)

@pytest.mark.parametrize(
"nullable_backend",
"dtype_backend",
["pandas", pytest.param("pyarrow", marks=td.skip_if_no("pyarrow"))],
)
def test_use_nullable_dtypes(self, read_ext, nullable_backend):
def test_use_nullable_dtypes(self, read_ext, dtype_backend):
# GH#36712
if read_ext in (".xlsb", ".xls"):
pytest.skip(f"No engine for filetype: '{read_ext}'")
Expand All @@ -561,11 +561,11 @@ def test_use_nullable_dtypes(self, read_ext, nullable_backend):
)
with tm.ensure_clean(read_ext) as file_path:
df.to_excel(file_path, "test", index=False)
with pd.option_context("mode.nullable_backend", nullable_backend):
with pd.option_context("mode.dtype_backend", dtype_backend):
result = pd.read_excel(
file_path, sheet_name="test", use_nullable_dtypes=True
)
if nullable_backend == "pyarrow":
if dtype_backend == "pyarrow":
import pyarrow as pa

from pandas.arrays import ArrowExtensionArray
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/parser/dtypes/test_dtypes_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ def test_use_nullable_dtypes_pyarrow_backend(all_parsers, request):
1,2.5,True,a,,,,,12-31-2019,
3,4.5,False,b,6,7.5,True,a,12-31-2019,
"""
with pd.option_context("mode.nullable_backend", "pyarrow"):
with pd.option_context("mode.dtype_backend", "pyarrow"):
if engine == "c":
request.node.add_marker(
pytest.mark.xfail(
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/io/test_orc.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,9 +309,9 @@ def test_orc_use_nullable_dtypes_pandas_backend_not_supported(dirpath):
input_file = os.path.join(dirpath, "TestOrcFile.emptyFile.orc")
with pytest.raises(
NotImplementedError,
match="mode.nullable_backend set to pandas is not implemented.",
match="mode.dtype_backend set to pandas is not implemented.",
):
with pd.option_context("mode.nullable_backend", "pandas"):
with pd.option_context("mode.dtype_backend", "pandas"):
read_orc(input_file, use_nullable_dtypes=True)


Expand All @@ -337,7 +337,7 @@ def test_orc_use_nullable_dtypes_pyarrow_backend():
}
)
bytes_data = df.copy().to_orc()
with pd.option_context("mode.nullable_backend", "pyarrow"):
with pd.option_context("mode.dtype_backend", "pyarrow"):
result = read_orc(BytesIO(bytes_data), use_nullable_dtypes=True)
expected = pd.DataFrame(
{
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -1037,7 +1037,7 @@ def test_read_use_nullable_types_pyarrow_config(self, pa, df_full):
pd.ArrowDtype(pyarrow.timestamp(unit="us", tz="Europe/Brussels"))
)

with pd.option_context("mode.nullable_backend", "pyarrow"):
with pd.option_context("mode.dtype_backend", "pyarrow"):
check_round_trip(
df,
engine=pa,
Expand Down