From 407be763f698b9530990dcd8625bb8516a86c5c8 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 2 Dec 2022 12:13:40 -0800 Subject: [PATCH] REF: remove NDFrame._convert --- pandas/core/generic.py | 31 ------- pandas/core/groupby/generic.py | 5 +- pandas/plotting/_matplotlib/core.py | 4 +- pandas/plotting/_matplotlib/hist.py | 2 +- pandas/tests/apply/test_frame_apply.py | 2 +- pandas/tests/frame/methods/test_convert.py | 42 --------- pandas/tests/io/pytables/test_append.py | 1 - pandas/tests/io/pytables/test_errors.py | 2 +- pandas/tests/io/pytables/test_put.py | 2 +- pandas/tests/io/pytables/test_store.py | 4 +- pandas/tests/io/test_html.py | 2 +- pandas/tests/series/methods/test_convert.py | 94 --------------------- 12 files changed, 11 insertions(+), 180 deletions(-) delete mode 100644 pandas/tests/frame/methods/test_convert.py delete mode 100644 pandas/tests/series/methods/test_convert.py diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 038c889e4d5f7..d1e48a3d10a1e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6315,37 +6315,6 @@ def __deepcopy__(self: NDFrameT, memo=None) -> NDFrameT: """ return self.copy(deep=True) - @final - def _convert( - self: NDFrameT, - *, - datetime: bool_t = False, - timedelta: bool_t = False, - ) -> NDFrameT: - """ - Attempt to infer better dtype for object columns. - - Parameters - ---------- - datetime : bool, default False - If True, convert to date where possible. - timedelta : bool, default False - If True, convert to timedelta where possible. - - Returns - ------- - converted : same as input object - """ - validate_bool_kwarg(datetime, "datetime") - validate_bool_kwarg(timedelta, "timedelta") - return self._constructor( - self._mgr.convert( - datetime=datetime, - timedelta=timedelta, - copy=True, - ) - ).__finalize__(self) - @final def infer_objects(self: NDFrameT) -> NDFrameT: """ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index a80892a145a70..d3e37a40614b3 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1681,9 +1681,8 @@ def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame: if self.axis == 1: result = result.T - # Note: we only need to pass datetime=True in order to get numeric - # values converted - return self._reindex_output(result)._convert(datetime=True) + # Note: we really only care about inferring numeric dtypes here + return self._reindex_output(result).infer_objects() def _iterate_column_groupbys(self, obj: DataFrame | Series): for i, colname in enumerate(obj.columns): diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 324076cd38917..3a634a60e784e 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -600,9 +600,9 @@ def _compute_plot_data(self): self.subplots = True data = reconstruct_data_with_by(self.data, by=self.by, cols=self.columns) - # GH16953, _convert is needed as fallback, for ``Series`` + # GH16953, infer_objects is needed as fallback, for ``Series`` # with ``dtype == object`` - data = data._convert(datetime=True, timedelta=True) + data = data.infer_objects() include_type = [np.number, "datetime", "datetimetz", "timedelta"] # GH23719, allow plotting boolean diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index 27c69a31f31a2..337628aa3bc2e 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -78,7 +78,7 @@ def _args_adjust(self) -> None: def _calculate_bins(self, data: DataFrame) -> np.ndarray: """Calculate bins given data""" - nd_values = data._convert(datetime=True)._get_numeric_data() + nd_values = data.infer_objects()._get_numeric_data() values = np.ravel(nd_values) values = values[~isna(values)] diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 7bf1621d0acea..e7c2618d388c2 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -461,7 +461,7 @@ def test_apply_convert_objects(): } ) - result = expected.apply(lambda x: x, axis=1)._convert(datetime=True) + result = expected.apply(lambda x: x, axis=1) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_convert.py b/pandas/tests/frame/methods/test_convert.py deleted file mode 100644 index c6c70210d1cc4..0000000000000 --- a/pandas/tests/frame/methods/test_convert.py +++ /dev/null @@ -1,42 +0,0 @@ -import numpy as np -import pytest - -from pandas import DataFrame -import pandas._testing as tm - - -class TestConvert: - def test_convert_objects(self, float_string_frame): - - oops = float_string_frame.T.T - converted = oops._convert(datetime=True) - tm.assert_frame_equal(converted, float_string_frame) - assert converted["A"].dtype == np.float64 - - # force numeric conversion - float_string_frame["H"] = "1." - float_string_frame["I"] = "1" - - # add in some items that will be nan - float_string_frame["J"] = "1." - float_string_frame["K"] = "1" - float_string_frame.loc[float_string_frame.index[0:5], ["J", "K"]] = "garbled" - converted = float_string_frame._convert(datetime=True) - tm.assert_frame_equal(converted, float_string_frame) - - # via astype - converted = float_string_frame.copy() - converted["H"] = converted["H"].astype("float64") - converted["I"] = converted["I"].astype("int64") - assert converted["H"].dtype == "float64" - assert converted["I"].dtype == "int64" - - # via astype, but errors - converted = float_string_frame.copy() - with pytest.raises(ValueError, match="invalid literal"): - converted["H"].astype("int32") - - def test_convert_objects_no_conversion(self): - mixed1 = DataFrame({"a": [1, 2, 3], "b": [4.0, 5, 6], "c": ["x", "y", "z"]}) - mixed2 = mixed1._convert(datetime=True) - tm.assert_frame_equal(mixed1, mixed2) diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py index 4ae31f300cb6f..5633b9f8a71c7 100644 --- a/pandas/tests/io/pytables/test_append.py +++ b/pandas/tests/io/pytables/test_append.py @@ -592,7 +592,6 @@ def check_col(key, name, size): df_dc.loc[df_dc.index[7:9], "string"] = "bar" df_dc["string2"] = "cool" df_dc["datetime"] = Timestamp("20010102") - df_dc = df_dc._convert(datetime=True) df_dc.loc[df_dc.index[3:5], ["A", "B", "datetime"]] = np.nan _maybe_remove(store, "df_dc") diff --git a/pandas/tests/io/pytables/test_errors.py b/pandas/tests/io/pytables/test_errors.py index 7e590df95f952..7629e8ca7dfc2 100644 --- a/pandas/tests/io/pytables/test_errors.py +++ b/pandas/tests/io/pytables/test_errors.py @@ -75,7 +75,7 @@ def test_unimplemented_dtypes_table_columns(setup_path): df["obj1"] = "foo" df["obj2"] = "bar" df["datetime1"] = datetime.date(2001, 1, 2) - df = df._consolidate()._convert(datetime=True) + df = df._consolidate() with ensure_clean_store(setup_path) as store: # this fails because we have a date in the object block...... diff --git a/pandas/tests/io/pytables/test_put.py b/pandas/tests/io/pytables/test_put.py index 2699d33950412..349fe74cb8e71 100644 --- a/pandas/tests/io/pytables/test_put.py +++ b/pandas/tests/io/pytables/test_put.py @@ -197,7 +197,7 @@ def test_put_mixed_type(setup_path): df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0) df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0) df.loc[df.index[3:6], ["obj1"]] = np.nan - df = df._consolidate()._convert(datetime=True) + df = df._consolidate() with ensure_clean_store(setup_path) as store: _maybe_remove(store, "df") diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 22873b0096817..1263d61b55cd5 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -129,7 +129,7 @@ def test_repr(setup_path): df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0) df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0) df.loc[df.index[3:6], ["obj1"]] = np.nan - df = df._consolidate()._convert(datetime=True) + df = df._consolidate() with catch_warnings(record=True): simplefilter("ignore", pd.errors.PerformanceWarning) @@ -444,7 +444,7 @@ def test_table_mixed_dtypes(setup_path): df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0) df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0) df.loc[df.index[3:6], ["obj1"]] = np.nan - df = df._consolidate()._convert(datetime=True) + df = df._consolidate() with ensure_clean_store(setup_path) as store: store.append("df1_mixed", df) diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index b1fcdd8df01ad..ffc5afcc70bb9 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -627,7 +627,7 @@ def try_remove_ws(x): ] dfnew = df.applymap(try_remove_ws).replace(old, new) gtnew = ground_truth.applymap(try_remove_ws) - converted = dfnew._convert(datetime=True) + converted = dfnew date_cols = ["Closing Date", "Updated Date"] converted[date_cols] = converted[date_cols].apply(to_datetime) tm.assert_frame_equal(converted, gtnew) diff --git a/pandas/tests/series/methods/test_convert.py b/pandas/tests/series/methods/test_convert.py deleted file mode 100644 index f979a28154d4e..0000000000000 --- a/pandas/tests/series/methods/test_convert.py +++ /dev/null @@ -1,94 +0,0 @@ -from datetime import datetime - -import pytest - -from pandas import ( - Series, - Timestamp, -) -import pandas._testing as tm - - -class TestConvert: - def test_convert(self): - # GH#10265 - dt = datetime(2001, 1, 1, 0, 0) - td = dt - datetime(2000, 1, 1, 0, 0) - - # Test coercion with mixed types - ser = Series(["a", "3.1415", dt, td]) - - # Test standard conversion returns original - results = ser._convert(datetime=True) - tm.assert_series_equal(results, ser) - - results = ser._convert(timedelta=True) - tm.assert_series_equal(results, ser) - - def test_convert_numeric_strings_with_other_true_args(self): - # test pass-through and non-conversion when other types selected - ser = Series(["1.0", "2.0", "3.0"]) - results = ser._convert(datetime=True, timedelta=True) - tm.assert_series_equal(results, ser) - - def test_convert_datetime_objects(self): - ser = Series( - [datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)], dtype="O" - ) - results = ser._convert(datetime=True, timedelta=True) - expected = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)]) - tm.assert_series_equal(results, expected) - results = ser._convert(datetime=False, timedelta=True) - tm.assert_series_equal(results, ser) - - def test_convert_datetime64(self): - # no-op if already dt64 dtype - ser = Series( - [ - datetime(2001, 1, 1, 0, 0), - datetime(2001, 1, 2, 0, 0), - datetime(2001, 1, 3, 0, 0), - ] - ) - - result = ser._convert(datetime=True) - expected = Series( - [Timestamp("20010101"), Timestamp("20010102"), Timestamp("20010103")], - dtype="M8[ns]", - ) - tm.assert_series_equal(result, expected) - - result = ser._convert(datetime=True) - tm.assert_series_equal(result, expected) - - def test_convert_timedeltas(self): - td = datetime(2001, 1, 1, 0, 0) - datetime(2000, 1, 1, 0, 0) - ser = Series([td, td], dtype="O") - results = ser._convert(datetime=True, timedelta=True) - expected = Series([td, td]) - tm.assert_series_equal(results, expected) - results = ser._convert(datetime=True, timedelta=False) - tm.assert_series_equal(results, ser) - - def test_convert_preserve_non_object(self): - # preserve if non-object - ser = Series([1], dtype="float32") - result = ser._convert(datetime=True) - tm.assert_series_equal(result, ser) - - def test_convert_no_arg_error(self): - ser = Series(["1.0", "2"]) - msg = r"At least one of datetime or timedelta must be True\." - with pytest.raises(ValueError, match=msg): - ser._convert() - - def test_convert_preserve_bool(self): - ser = Series([1, True, 3, 5], dtype=object) - res = ser._convert(datetime=True) - tm.assert_series_equal(res, ser) - - def test_convert_preserve_all_bool(self): - ser = Series([False, True, False, False], dtype=object) - res = ser._convert(datetime=True) - expected = Series([False, True, False, False], dtype=bool) - tm.assert_series_equal(res, expected)