diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 208bbfa10b9b2..888a6822fe318 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -28,10 +28,10 @@ The available extras, found in the :ref:`installation guide None: The default storage for StringDtype. """ -nullable_backend_doc = """ +dtype_backend_doc = """ : string - The nullable dtype implementation to return. - Available options: 'pandas', 'pyarrow', the default is 'pandas'. + The nullable dtype implementation to return. Only applicable to certain + operations where documented. Available options: 'pandas', 'pyarrow', + the default is 'pandas'. """ with cf.config_prefix("mode"): @@ -553,9 +554,9 @@ def use_inf_as_na_cb(key) -> None: validator=is_one_of_factory(["python", "pyarrow"]), ) cf.register_option( - "nullable_backend", + "dtype_backend", "pandas", - nullable_backend_doc, + dtype_backend_doc, validator=is_one_of_factory(["pandas", "pyarrow"]), ) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 53f4b7dce7f3b..baf008fb97d4e 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -961,7 +961,7 @@ def convert_dtypes( convert_boolean: bool = True, convert_floating: bool = True, infer_objects: bool = False, - nullable_backend: Literal["pandas", "pyarrow"] = "pandas", + dtype_backend: Literal["pandas", "pyarrow"] = "pandas", ) -> DtypeObj: """ Convert objects to best possible type, and optionally, @@ -983,7 +983,7 @@ def convert_dtypes( infer_objects : bool, defaults False Whether to also infer objects to float/int if possible. Is only hit if the object array contains pd.NA. - nullable_backend : str, default "pandas" + dtype_backend : str, default "pandas" Nullable dtype implementation to use. * "pandas" returns numpy-backed nullable types @@ -1076,7 +1076,7 @@ def convert_dtypes( else: inferred_dtype = input_array.dtype - if nullable_backend == "pyarrow": + if dtype_backend == "pyarrow": from pandas.core.arrays.arrow.array import to_pyarrow_type from pandas.core.arrays.arrow.dtype import ArrowDtype from pandas.core.arrays.string_ import StringDtype diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 25325d5d473d0..5700e50c45d52 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6435,9 +6435,9 @@ def convert_dtypes( .. versionadded:: 2.0 The nullable dtype implementation can be configured by calling - ``pd.set_option("mode.nullable_backend", "pandas")`` to use + ``pd.set_option("mode.dtype_backend", "pandas")`` to use numpy-backed nullable dtypes or - ``pd.set_option("mode.nullable_backend", "pyarrow")`` to use + ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``). Examples diff --git a/pandas/core/series.py b/pandas/core/series.py index b1758b485bf98..1bdf92e1dcf02 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5410,7 +5410,7 @@ def _convert_dtypes( input_series = input_series.copy() if convert_string or convert_integer or convert_boolean or convert_floating: - nullable_backend = get_option("mode.nullable_backend") + dtype_backend = get_option("mode.dtype_backend") inferred_dtype = convert_dtypes( input_series._values, convert_string, @@ -5418,7 +5418,7 @@ def _convert_dtypes( convert_boolean, convert_floating, infer_objects, - nullable_backend, + dtype_backend, ) result = input_series.astype(inferred_dtype) else: diff --git a/pandas/io/orc.py b/pandas/io/orc.py index bb8abc902010e..cfa02de9bbcb3 100644 --- a/pandas/io/orc.py +++ b/pandas/io/orc.py @@ -59,16 +59,16 @@ def read_orc( for the resulting DataFrame. The nullable dtype implementation can be configured by calling - ``pd.set_option("mode.nullable_backend", "pandas")`` to use + ``pd.set_option("mode.dtype_backend", "pandas")`` to use numpy-backed nullable dtypes or - ``pd.set_option("mode.nullable_backend", "pyarrow")`` to use + ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``). .. versionadded:: 2.0.0 .. note - Currently only ``mode.nullable_backend`` set to ``"pyarrow"`` is supported. + Currently only ``mode.dtype_backend`` set to ``"pyarrow"`` is supported. **kwargs Any additional kwargs are passed to pyarrow. @@ -90,10 +90,10 @@ def read_orc( orc_file = orc.ORCFile(handles.handle) pa_table = orc_file.read(columns=columns, **kwargs) if use_nullable_dtypes: - nullable_backend = get_option("mode.nullable_backend") - if nullable_backend != "pyarrow": + dtype_backend = get_option("mode.dtype_backend") + if dtype_backend != "pyarrow": raise NotImplementedError( - f"mode.nullable_backend set to {nullable_backend} is not implemented." + f"mode.dtype_backend set to {dtype_backend} is not implemented." ) df = DataFrame( { diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 8767596af3e58..568747685a36e 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -222,12 +222,12 @@ def read( ) -> DataFrame: kwargs["use_pandas_metadata"] = True - nullable_backend = get_option("mode.nullable_backend") + dtype_backend = get_option("mode.dtype_backend") to_pandas_kwargs = {} if use_nullable_dtypes: import pandas as pd - if nullable_backend == "pandas": + if dtype_backend == "pandas": mapping = { self.api.int8(): pd.Int8Dtype(), self.api.int16(): pd.Int16Dtype(), @@ -257,9 +257,9 @@ def read( pa_table = self.api.parquet.read_table( path_or_handle, columns=columns, **kwargs ) - if nullable_backend == "pandas": + if dtype_backend == "pandas": result = pa_table.to_pandas(**to_pandas_kwargs) - elif nullable_backend == "pyarrow": + elif dtype_backend == "pyarrow": result = DataFrame( { col_name: arrays.ArrowExtensionArray(pa_col) @@ -509,9 +509,9 @@ def read_parquet( .. versionadded:: 1.2.0 The nullable dtype implementation can be configured by calling - ``pd.set_option("mode.nullable_backend", "pandas")`` to use + ``pd.set_option("mode.dtype_backend", "pandas")`` to use numpy-backed nullable dtypes or - ``pd.set_option("mode.nullable_backend", "pyarrow")`` to use + ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``). .. versionadded:: 2.0.0 diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py index 3ef53eeca6ee1..420b6212f857a 100644 --- a/pandas/io/parsers/arrow_parser_wrapper.py +++ b/pandas/io/parsers/arrow_parser_wrapper.py @@ -151,7 +151,7 @@ def read(self) -> DataFrame: ) if ( self.kwds["use_nullable_dtypes"] - and get_option("mode.nullable_backend") == "pyarrow" + and get_option("mode.dtype_backend") == "pyarrow" ): frame = DataFrame( { diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 2f6ee34092541..9fdb1380e14eb 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -712,7 +712,7 @@ def _infer_types( use_nullable_dtypes: Literal[True] | Literal[False] = ( self.use_nullable_dtypes and no_dtype_specified ) - nullable_backend = get_option("mode.nullable_backend") + dtype_backend = get_option("mode.dtype_backend") result: ArrayLike if try_num_bool and is_object_dtype(values.dtype): @@ -770,7 +770,7 @@ def _infer_types( if inferred_type != "datetime": result = StringDtype().construct_array_type()._from_sequence(values) - if use_nullable_dtypes and nullable_backend == "pyarrow": + if use_nullable_dtypes and dtype_backend == "pyarrow": pa = import_optional_dependency("pyarrow") if isinstance(result, np.ndarray): result = ArrowExtensionArray(pa.array(result, from_pandas=True)) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 46ad6ebb64464..0f1aa8114117c 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -399,9 +399,9 @@ implementation, even if no nulls are present. The nullable dtype implementation can be configured by calling - ``pd.set_option("mode.nullable_backend", "pandas")`` to use + ``pd.set_option("mode.dtype_backend", "pandas")`` to use numpy-backed nullable dtypes or - ``pd.set_option("mode.nullable_backend", "pyarrow")`` to use + ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``). .. versionadded:: 2.0 @@ -561,12 +561,12 @@ def _read( ) elif ( kwds.get("use_nullable_dtypes", False) - and get_option("mode.nullable_backend") == "pyarrow" + and get_option("mode.dtype_backend") == "pyarrow" and kwds.get("engine") == "c" ): raise NotImplementedError( f"use_nullable_dtypes=True and engine={kwds['engine']} with " - "mode.nullable_backend set to 'pyarrow' is not implemented." + "mode.dtype_backend set to 'pyarrow' is not implemented." ) else: chunksize = validate_integer("chunksize", chunksize, 1) diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py index 01c9a88468655..aaccaff0c0c42 100644 --- a/pandas/tests/frame/methods/test_convert_dtypes.py +++ b/pandas/tests/frame/methods/test_convert_dtypes.py @@ -44,7 +44,7 @@ def test_convert_dtypes_retain_column_names(self): tm.assert_index_equal(result.columns, df.columns) assert result.columns.name == "cols" - def test_pyarrow_nullable_backend(self): + def test_pyarrow_dtype_backend(self): pa = pytest.importorskip("pyarrow") df = pd.DataFrame( { @@ -56,7 +56,7 @@ def test_pyarrow_nullable_backend(self): "f": pd.Series(pd.timedelta_range("1D", periods=3)), } ) - with pd.option_context("mode.nullable_backend", "pyarrow"): + with pd.option_context("mode.dtype_backend", "pyarrow"): result = df.convert_dtypes() expected = pd.DataFrame( { @@ -90,14 +90,14 @@ def test_pyarrow_nullable_backend(self): ) tm.assert_frame_equal(result, expected) - def test_pyarrow_nullable_backend_already_pyarrow(self): + def test_pyarrow_dtype_backend_already_pyarrow(self): pytest.importorskip("pyarrow") expected = pd.DataFrame([1, 2, 3], dtype="int64[pyarrow]") - with pd.option_context("mode.nullable_backend", "pyarrow"): + with pd.option_context("mode.dtype_backend", "pyarrow"): result = expected.convert_dtypes() tm.assert_frame_equal(result, expected) - def test_pyarrow_nullable_backend_from_pandas_nullable(self): + def test_pyarrow_dtype_backend_from_pandas_nullable(self): pa = pytest.importorskip("pyarrow") df = pd.DataFrame( { @@ -107,7 +107,7 @@ def test_pyarrow_nullable_backend_from_pandas_nullable(self): "d": pd.Series([None, 100.5, 200], dtype="Float64"), } ) - with pd.option_context("mode.nullable_backend", "pyarrow"): + with pd.option_context("mode.dtype_backend", "pyarrow"): result = df.convert_dtypes() expected = pd.DataFrame( { diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index db8b69bc951f9..ad8f1ac7d7d52 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -537,10 +537,10 @@ def test_reader_dtype_str(self, read_ext, dtype, expected): tm.assert_frame_equal(actual, expected) @pytest.mark.parametrize( - "nullable_backend", + "dtype_backend", ["pandas", pytest.param("pyarrow", marks=td.skip_if_no("pyarrow"))], ) - def test_use_nullable_dtypes(self, read_ext, nullable_backend): + def test_use_nullable_dtypes(self, read_ext, dtype_backend): # GH#36712 if read_ext in (".xlsb", ".xls"): pytest.skip(f"No engine for filetype: '{read_ext}'") @@ -561,11 +561,11 @@ def test_use_nullable_dtypes(self, read_ext, nullable_backend): ) with tm.ensure_clean(read_ext) as file_path: df.to_excel(file_path, "test", index=False) - with pd.option_context("mode.nullable_backend", nullable_backend): + with pd.option_context("mode.dtype_backend", dtype_backend): result = pd.read_excel( file_path, sheet_name="test", use_nullable_dtypes=True ) - if nullable_backend == "pyarrow": + if dtype_backend == "pyarrow": import pyarrow as pa from pandas.arrays import ArrowExtensionArray diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index 0b5e1ef852208..8fd08122f0834 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -498,7 +498,7 @@ def test_use_nullable_dtypes_pyarrow_backend(all_parsers, request): 1,2.5,True,a,,,,,12-31-2019, 3,4.5,False,b,6,7.5,True,a,12-31-2019, """ - with pd.option_context("mode.nullable_backend", "pyarrow"): + with pd.option_context("mode.dtype_backend", "pyarrow"): if engine == "c": request.node.add_marker( pytest.mark.xfail( diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py index 1b811fc18c7f8..87f648bb5acd6 100644 --- a/pandas/tests/io/test_orc.py +++ b/pandas/tests/io/test_orc.py @@ -309,9 +309,9 @@ def test_orc_use_nullable_dtypes_pandas_backend_not_supported(dirpath): input_file = os.path.join(dirpath, "TestOrcFile.emptyFile.orc") with pytest.raises( NotImplementedError, - match="mode.nullable_backend set to pandas is not implemented.", + match="mode.dtype_backend set to pandas is not implemented.", ): - with pd.option_context("mode.nullable_backend", "pandas"): + with pd.option_context("mode.dtype_backend", "pandas"): read_orc(input_file, use_nullable_dtypes=True) @@ -337,7 +337,7 @@ def test_orc_use_nullable_dtypes_pyarrow_backend(): } ) bytes_data = df.copy().to_orc() - with pd.option_context("mode.nullable_backend", "pyarrow"): + with pd.option_context("mode.dtype_backend", "pyarrow"): result = read_orc(BytesIO(bytes_data), use_nullable_dtypes=True) expected = pd.DataFrame( { diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index a609d0774757e..398e2ccb09df2 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -1037,7 +1037,7 @@ def test_read_use_nullable_types_pyarrow_config(self, pa, df_full): pd.ArrowDtype(pyarrow.timestamp(unit="us", tz="Europe/Brussels")) ) - with pd.option_context("mode.nullable_backend", "pyarrow"): + with pd.option_context("mode.dtype_backend", "pyarrow"): check_round_trip( df, engine=pa,