From d0de4c590d5eb0a00901fb2f5e6b833086103194 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 15 Dec 2022 16:10:21 -0800 Subject: [PATCH 1/4] REF: Rename mode.nullable_backend to mode.dtype_backend --- pandas/core/generic.py | 4 ++-- pandas/core/series.py | 2 +- pandas/io/orc.py | 10 +++++----- pandas/io/parquet.py | 6 +++--- pandas/io/parsers/arrow_parser_wrapper.py | 2 +- pandas/io/parsers/base_parser.py | 2 +- pandas/io/parsers/readers.py | 8 ++++---- pandas/tests/frame/methods/test_convert_dtypes.py | 6 +++--- pandas/tests/io/excel/test_readers.py | 2 +- pandas/tests/io/parser/dtypes/test_dtypes_basic.py | 2 +- pandas/tests/io/test_orc.py | 6 +++--- pandas/tests/io/test_parquet.py | 2 +- 12 files changed, 26 insertions(+), 26 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a4d99cb0eca42..83de37f5deb8d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6435,9 +6435,9 @@ def convert_dtypes( .. versionadded:: 2.0 The nullable dtype implementation can be configured by calling - ``pd.set_option("mode.nullable_backend", "pandas")`` to use + ``pd.set_option("mode.dtype_backend", "pandas")`` to use numpy-backed nullable dtypes or - ``pd.set_option("mode.nullable_backend", "pyarrow")`` to use + ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``). Examples diff --git a/pandas/core/series.py b/pandas/core/series.py index b1758b485bf98..ff74c1496191a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5410,7 +5410,7 @@ def _convert_dtypes( input_series = input_series.copy() if convert_string or convert_integer or convert_boolean or convert_floating: - nullable_backend = get_option("mode.nullable_backend") + nullable_backend = get_option("mode.dtype_backend") inferred_dtype = convert_dtypes( input_series._values, convert_string, diff --git a/pandas/io/orc.py b/pandas/io/orc.py index bb8abc902010e..dfa268955bbf3 100644 --- a/pandas/io/orc.py +++ b/pandas/io/orc.py @@ -59,16 +59,16 @@ def read_orc( for the resulting DataFrame. The nullable dtype implementation can be configured by calling - ``pd.set_option("mode.nullable_backend", "pandas")`` to use + ``pd.set_option("mode.dtype_backend", "pandas")`` to use numpy-backed nullable dtypes or - ``pd.set_option("mode.nullable_backend", "pyarrow")`` to use + ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``). .. versionadded:: 2.0.0 .. note - Currently only ``mode.nullable_backend`` set to ``"pyarrow"`` is supported. + Currently only ``mode.dtype_backend`` set to ``"pyarrow"`` is supported. **kwargs Any additional kwargs are passed to pyarrow. @@ -90,10 +90,10 @@ def read_orc( orc_file = orc.ORCFile(handles.handle) pa_table = orc_file.read(columns=columns, **kwargs) if use_nullable_dtypes: - nullable_backend = get_option("mode.nullable_backend") + nullable_backend = get_option("mode.dtype_backend") if nullable_backend != "pyarrow": raise NotImplementedError( - f"mode.nullable_backend set to {nullable_backend} is not implemented." + f"mode.dtype_backend set to {nullable_backend} is not implemented." ) df = DataFrame( { diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 8767596af3e58..927f8b7720a09 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -222,7 +222,7 @@ def read( ) -> DataFrame: kwargs["use_pandas_metadata"] = True - nullable_backend = get_option("mode.nullable_backend") + nullable_backend = get_option("mode.dtype_backend") to_pandas_kwargs = {} if use_nullable_dtypes: import pandas as pd @@ -509,9 +509,9 @@ def read_parquet( .. versionadded:: 1.2.0 The nullable dtype implementation can be configured by calling - ``pd.set_option("mode.nullable_backend", "pandas")`` to use + ``pd.set_option("mode.dtype_backend", "pandas")`` to use numpy-backed nullable dtypes or - ``pd.set_option("mode.nullable_backend", "pyarrow")`` to use + ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``). .. versionadded:: 2.0.0 diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py index 3ef53eeca6ee1..420b6212f857a 100644 --- a/pandas/io/parsers/arrow_parser_wrapper.py +++ b/pandas/io/parsers/arrow_parser_wrapper.py @@ -151,7 +151,7 @@ def read(self) -> DataFrame: ) if ( self.kwds["use_nullable_dtypes"] - and get_option("mode.nullable_backend") == "pyarrow" + and get_option("mode.dtype_backend") == "pyarrow" ): frame = DataFrame( { diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index e6f4830846c77..16f9a531a4beb 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -713,7 +713,7 @@ def _infer_types( use_nullable_dtypes: Literal[True] | Literal[False] = ( self.use_nullable_dtypes and no_dtype_specified ) - nullable_backend = get_option("mode.nullable_backend") + nullable_backend = get_option("mode.dtype_backend") result: ArrayLike if try_num_bool and is_object_dtype(values.dtype): diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 0690ebfae727f..9560fc5e6494a 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -399,9 +399,9 @@ implementation, even if no nulls are present. The nullable dtype implementation can be configured by calling - ``pd.set_option("mode.nullable_backend", "pandas")`` to use + ``pd.set_option("mode.dtype_backend", "pandas")`` to use numpy-backed nullable dtypes or - ``pd.set_option("mode.nullable_backend", "pyarrow")`` to use + ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``). .. versionadded:: 2.0 @@ -561,11 +561,11 @@ def _read( ) elif ( kwds.get("use_nullable_dtypes", False) - and get_option("mode.nullable_backend") == "pyarrow" + and get_option("mode.dtype_backend") == "pyarrow" ): raise NotImplementedError( f"use_nullable_dtypes=True and engine={kwds['engine']} with " - "mode.nullable_backend set to 'pyarrow' is not implemented." + "mode.dtype_backend set to 'pyarrow' is not implemented." ) else: chunksize = validate_integer("chunksize", chunksize, 1) diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py index 01c9a88468655..1b65477f1cc0d 100644 --- a/pandas/tests/frame/methods/test_convert_dtypes.py +++ b/pandas/tests/frame/methods/test_convert_dtypes.py @@ -56,7 +56,7 @@ def test_pyarrow_nullable_backend(self): "f": pd.Series(pd.timedelta_range("1D", periods=3)), } ) - with pd.option_context("mode.nullable_backend", "pyarrow"): + with pd.option_context("mode.dtype_backend", "pyarrow"): result = df.convert_dtypes() expected = pd.DataFrame( { @@ -93,7 +93,7 @@ def test_pyarrow_nullable_backend(self): def test_pyarrow_nullable_backend_already_pyarrow(self): pytest.importorskip("pyarrow") expected = pd.DataFrame([1, 2, 3], dtype="int64[pyarrow]") - with pd.option_context("mode.nullable_backend", "pyarrow"): + with pd.option_context("mode.dtype_backend", "pyarrow"): result = expected.convert_dtypes() tm.assert_frame_equal(result, expected) @@ -107,7 +107,7 @@ def test_pyarrow_nullable_backend_from_pandas_nullable(self): "d": pd.Series([None, 100.5, 200], dtype="Float64"), } ) - with pd.option_context("mode.nullable_backend", "pyarrow"): + with pd.option_context("mode.dtype_backend", "pyarrow"): result = df.convert_dtypes() expected = pd.DataFrame( { diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 3e879b72a8dcf..9c5a1f8500308 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -561,7 +561,7 @@ def test_use_nullable_dtypes(self, read_ext, nullable_backend): ) with tm.ensure_clean(read_ext) as file_path: df.to_excel(file_path, "test", index=False) - with pd.option_context("mode.nullable_backend", nullable_backend): + with pd.option_context("mode.dtype_backend", nullable_backend): result = pd.read_excel( file_path, sheet_name="test", use_nullable_dtypes=True ) diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index 95411310bcc35..2b15693adfb35 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -498,7 +498,7 @@ def test_use_nullable_dtypes_pyarrow_backend(all_parsers, request): 1,2.5,True,a,,,,,12-31-2019, 3,4.5,False,b,6,7.5,True,a,12-31-2019, """ - with pd.option_context("mode.nullable_backend", "pyarrow"): + with pd.option_context("mode.dtype_backend", "pyarrow"): if parser.engine != "pyarrow": request.node.add_marker( pytest.mark.xfail( diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py index 1b811fc18c7f8..87f648bb5acd6 100644 --- a/pandas/tests/io/test_orc.py +++ b/pandas/tests/io/test_orc.py @@ -309,9 +309,9 @@ def test_orc_use_nullable_dtypes_pandas_backend_not_supported(dirpath): input_file = os.path.join(dirpath, "TestOrcFile.emptyFile.orc") with pytest.raises( NotImplementedError, - match="mode.nullable_backend set to pandas is not implemented.", + match="mode.dtype_backend set to pandas is not implemented.", ): - with pd.option_context("mode.nullable_backend", "pandas"): + with pd.option_context("mode.dtype_backend", "pandas"): read_orc(input_file, use_nullable_dtypes=True) @@ -337,7 +337,7 @@ def test_orc_use_nullable_dtypes_pyarrow_backend(): } ) bytes_data = df.copy().to_orc() - with pd.option_context("mode.nullable_backend", "pyarrow"): + with pd.option_context("mode.dtype_backend", "pyarrow"): result = read_orc(BytesIO(bytes_data), use_nullable_dtypes=True) expected = pd.DataFrame( { diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index a609d0774757e..398e2ccb09df2 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -1037,7 +1037,7 @@ def test_read_use_nullable_types_pyarrow_config(self, pa, df_full): pd.ArrowDtype(pyarrow.timestamp(unit="us", tz="Europe/Brussels")) ) - with pd.option_context("mode.nullable_backend", "pyarrow"): + with pd.option_context("mode.dtype_backend", "pyarrow"): check_round_trip( df, engine=pa, From 0c0eaaf0e39de7bbbc1c03ef5c742dc823f254dc Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 15 Dec 2022 17:27:00 -0800 Subject: [PATCH 2/4] Convert more terms --- pandas/core/config_init.py | 6 +++--- pandas/core/dtypes/cast.py | 6 +++--- pandas/core/series.py | 4 ++-- pandas/io/orc.py | 6 +++--- pandas/io/parquet.py | 8 ++++---- pandas/io/parsers/base_parser.py | 4 ++-- pandas/tests/frame/methods/test_convert_dtypes.py | 6 +++--- pandas/tests/io/excel/test_readers.py | 8 ++++---- 8 files changed, 24 insertions(+), 24 deletions(-) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 0aca950fe6f3b..148cd232d474c 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -539,7 +539,7 @@ def use_inf_as_na_cb(key) -> None: The default storage for StringDtype. """ -nullable_backend_doc = """ +dtype_backend_doc = """ : string The nullable dtype implementation to return. Available options: 'pandas', 'pyarrow', the default is 'pandas'. @@ -553,9 +553,9 @@ def use_inf_as_na_cb(key) -> None: validator=is_one_of_factory(["python", "pyarrow"]), ) cf.register_option( - "nullable_backend", + "dtype_backend", "pandas", - nullable_backend_doc, + dtype_backend_doc, validator=is_one_of_factory(["pandas", "pyarrow"]), ) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 455257833ec0d..db4d221c6df74 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -961,7 +961,7 @@ def convert_dtypes( convert_boolean: bool = True, convert_floating: bool = True, infer_objects: bool = False, - nullable_backend: Literal["pandas", "pyarrow"] = "pandas", + dtype_backend: Literal["pandas", "pyarrow"] = "pandas", ) -> DtypeObj: """ Convert objects to best possible type, and optionally, @@ -983,7 +983,7 @@ def convert_dtypes( infer_objects : bool, defaults False Whether to also infer objects to float/int if possible. Is only hit if the object array contains pd.NA. - nullable_backend : str, default "pandas" + dtype_backend : str, default "pandas" Nullable dtype implementation to use. * "pandas" returns numpy-backed nullable types @@ -1076,7 +1076,7 @@ def convert_dtypes( else: inferred_dtype = input_array.dtype - if nullable_backend == "pyarrow": + if dtype_backend == "pyarrow": from pandas.core.arrays.arrow.array import to_pyarrow_type from pandas.core.arrays.arrow.dtype import ArrowDtype from pandas.core.arrays.string_ import StringDtype diff --git a/pandas/core/series.py b/pandas/core/series.py index ff74c1496191a..1bdf92e1dcf02 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5410,7 +5410,7 @@ def _convert_dtypes( input_series = input_series.copy() if convert_string or convert_integer or convert_boolean or convert_floating: - nullable_backend = get_option("mode.dtype_backend") + dtype_backend = get_option("mode.dtype_backend") inferred_dtype = convert_dtypes( input_series._values, convert_string, @@ -5418,7 +5418,7 @@ def _convert_dtypes( convert_boolean, convert_floating, infer_objects, - nullable_backend, + dtype_backend, ) result = input_series.astype(inferred_dtype) else: diff --git a/pandas/io/orc.py b/pandas/io/orc.py index dfa268955bbf3..cfa02de9bbcb3 100644 --- a/pandas/io/orc.py +++ b/pandas/io/orc.py @@ -90,10 +90,10 @@ def read_orc( orc_file = orc.ORCFile(handles.handle) pa_table = orc_file.read(columns=columns, **kwargs) if use_nullable_dtypes: - nullable_backend = get_option("mode.dtype_backend") - if nullable_backend != "pyarrow": + dtype_backend = get_option("mode.dtype_backend") + if dtype_backend != "pyarrow": raise NotImplementedError( - f"mode.dtype_backend set to {nullable_backend} is not implemented." + f"mode.dtype_backend set to {dtype_backend} is not implemented." ) df = DataFrame( { diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 927f8b7720a09..568747685a36e 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -222,12 +222,12 @@ def read( ) -> DataFrame: kwargs["use_pandas_metadata"] = True - nullable_backend = get_option("mode.dtype_backend") + dtype_backend = get_option("mode.dtype_backend") to_pandas_kwargs = {} if use_nullable_dtypes: import pandas as pd - if nullable_backend == "pandas": + if dtype_backend == "pandas": mapping = { self.api.int8(): pd.Int8Dtype(), self.api.int16(): pd.Int16Dtype(), @@ -257,9 +257,9 @@ def read( pa_table = self.api.parquet.read_table( path_or_handle, columns=columns, **kwargs ) - if nullable_backend == "pandas": + if dtype_backend == "pandas": result = pa_table.to_pandas(**to_pandas_kwargs) - elif nullable_backend == "pyarrow": + elif dtype_backend == "pyarrow": result = DataFrame( { col_name: arrays.ArrowExtensionArray(pa_col) diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 16f9a531a4beb..fca44a4565a39 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -713,7 +713,7 @@ def _infer_types( use_nullable_dtypes: Literal[True] | Literal[False] = ( self.use_nullable_dtypes and no_dtype_specified ) - nullable_backend = get_option("mode.dtype_backend") + dtype_backend = get_option("mode.dtype_backend") result: ArrayLike if try_num_bool and is_object_dtype(values.dtype): @@ -771,7 +771,7 @@ def _infer_types( if inferred_type != "datetime": result = StringDtype().construct_array_type()._from_sequence(values) - if use_nullable_dtypes and nullable_backend == "pyarrow": + if use_nullable_dtypes and dtype_backend == "pyarrow": pa = import_optional_dependency("pyarrow") if isinstance(result, np.ndarray): result = ArrowExtensionArray(pa.array(result, from_pandas=True)) diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py index 1b65477f1cc0d..aaccaff0c0c42 100644 --- a/pandas/tests/frame/methods/test_convert_dtypes.py +++ b/pandas/tests/frame/methods/test_convert_dtypes.py @@ -44,7 +44,7 @@ def test_convert_dtypes_retain_column_names(self): tm.assert_index_equal(result.columns, df.columns) assert result.columns.name == "cols" - def test_pyarrow_nullable_backend(self): + def test_pyarrow_dtype_backend(self): pa = pytest.importorskip("pyarrow") df = pd.DataFrame( { @@ -90,14 +90,14 @@ def test_pyarrow_nullable_backend(self): ) tm.assert_frame_equal(result, expected) - def test_pyarrow_nullable_backend_already_pyarrow(self): + def test_pyarrow_dtype_backend_already_pyarrow(self): pytest.importorskip("pyarrow") expected = pd.DataFrame([1, 2, 3], dtype="int64[pyarrow]") with pd.option_context("mode.dtype_backend", "pyarrow"): result = expected.convert_dtypes() tm.assert_frame_equal(result, expected) - def test_pyarrow_nullable_backend_from_pandas_nullable(self): + def test_pyarrow_dtype_backend_from_pandas_nullable(self): pa = pytest.importorskip("pyarrow") df = pd.DataFrame( { diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 9c5a1f8500308..5b16d3fcd3f2f 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -537,10 +537,10 @@ def test_reader_dtype_str(self, read_ext, dtype, expected): tm.assert_frame_equal(actual, expected) @pytest.mark.parametrize( - "nullable_backend", + "dtype_backend", ["pandas", pytest.param("pyarrow", marks=td.skip_if_no("pyarrow"))], ) - def test_use_nullable_dtypes(self, read_ext, nullable_backend): + def test_use_nullable_dtypes(self, read_ext, dtype_backend): # GH#36712 if read_ext in (".xlsb", ".xls"): pytest.skip(f"No engine for filetype: '{read_ext}'") @@ -561,11 +561,11 @@ def test_use_nullable_dtypes(self, read_ext, nullable_backend): ) with tm.ensure_clean(read_ext) as file_path: df.to_excel(file_path, "test", index=False) - with pd.option_context("mode.dtype_backend", nullable_backend): + with pd.option_context("mode.dtype_backend", dtype_backend): result = pd.read_excel( file_path, sheet_name="test", use_nullable_dtypes=True ) - if nullable_backend == "pyarrow": + if dtype_backend == "pyarrow": import pyarrow as pa from pandas.arrays import ArrowExtensionArray From 30451276d1e6ac200ed6ee2d7c30913281f6b314 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 16 Dec 2022 16:39:34 -0800 Subject: [PATCH 3/4] Update whatsnew --- doc/source/whatsnew/v2.0.0.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index cbad169fe4d56..c494816c7c9c7 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -28,10 +28,10 @@ The available extras, found in the :ref:`installation guide Date: Wed, 21 Dec 2022 10:36:51 -0800 Subject: [PATCH 4/4] Clarified applicability --- pandas/core/config_init.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 148cd232d474c..da9e7de9821b1 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -541,8 +541,9 @@ def use_inf_as_na_cb(key) -> None: dtype_backend_doc = """ : string - The nullable dtype implementation to return. - Available options: 'pandas', 'pyarrow', the default is 'pandas'. + The nullable dtype implementation to return. Only applicable to certain + operations where documented. Available options: 'pandas', 'pyarrow', + the default is 'pandas'. """ with cf.config_prefix("mode"):