Skip to content

CLN: remove unused coerce arg in NDFrame._convert #38151

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 16 additions & 29 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1177,45 +1177,32 @@ def soft_convert_objects(
datetime: bool = True,
numeric: bool = True,
timedelta: bool = True,
coerce: bool = False,
copy: bool = True,
):
""" if we have an object dtype, try to coerce dates and/or numbers """
"""
Try to coerce datetime, timedelta, and numeric object-dtype columns
to inferred dtype.

Parameters
----------
values : np.ndarray[object]
datetime : bool, default True
numeric: bool, default True
timedelta : bool, default True
copy : bool, default True

Returns
-------
np.ndarray
"""
validate_bool_kwarg(datetime, "datetime")
validate_bool_kwarg(numeric, "numeric")
validate_bool_kwarg(timedelta, "timedelta")
validate_bool_kwarg(coerce, "coerce")
validate_bool_kwarg(copy, "copy")

conversion_count = sum((datetime, numeric, timedelta))
if conversion_count == 0:
raise ValueError("At least one of datetime, numeric or timedelta must be True.")
elif conversion_count > 1 and coerce:
raise ValueError(
"Only one of 'datetime', 'numeric' or "
"'timedelta' can be True when coerce=True."
)

if not is_object_dtype(values.dtype):
# If not object, do not attempt conversion
values = values.copy() if copy else values
return values

# If 1 flag is coerce, ensure 2 others are False
if coerce:
# Immediate return if coerce
if datetime:
from pandas import to_datetime

return to_datetime(values, errors="coerce").to_numpy()
elif timedelta:
from pandas import to_timedelta

return to_timedelta(values, errors="coerce").to_numpy()
elif numeric:
from pandas import to_numeric

return to_numeric(values, errors="coerce")

# Soft conversions
if datetime:
Expand Down
10 changes: 1 addition & 9 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5997,7 +5997,6 @@ def _convert(
datetime: bool_t = False,
numeric: bool_t = False,
timedelta: bool_t = False,
coerce: bool_t = False,
) -> FrameOrSeries:
"""
Attempt to infer better dtype for object columns
Expand All @@ -6011,9 +6010,6 @@ def _convert(
unconvertible values becoming NaN.
timedelta : bool, default False
If True, convert to timedelta where possible.
coerce : bool, default False
If True, force conversion with unconvertible values converted to
nulls (NaN or NaT).

Returns
-------
Expand All @@ -6022,13 +6018,11 @@ def _convert(
validate_bool_kwarg(datetime, "datetime")
validate_bool_kwarg(numeric, "numeric")
validate_bool_kwarg(timedelta, "timedelta")
validate_bool_kwarg(coerce, "coerce")
return self._constructor(
self._mgr.convert(
datetime=datetime,
numeric=numeric,
timedelta=timedelta,
coerce=coerce,
copy=True,
)
).__finalize__(self)
Expand Down Expand Up @@ -6076,9 +6070,7 @@ def infer_objects(self: FrameOrSeries) -> FrameOrSeries:
# python objects will still be converted to
# native numpy numeric types
return self._constructor(
self._mgr.convert(
datetime=True, numeric=False, timedelta=True, coerce=False, copy=True
)
self._mgr.convert(datetime=True, numeric=False, timedelta=True, copy=True)
).__finalize__(self, method="infer_objects")

@final
Expand Down
6 changes: 2 additions & 4 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -700,7 +700,6 @@ def convert(
datetime: bool = True,
numeric: bool = True,
timedelta: bool = True,
coerce: bool = False,
) -> List["Block"]:
"""
attempt to coerce any object types to better types return a copy
Expand Down Expand Up @@ -2506,12 +2505,12 @@ def convert(
datetime: bool = True,
numeric: bool = True,
timedelta: bool = True,
coerce: bool = False,
) -> List["Block"]:
"""
attempt to coerce any object types to better types return a copy of
attempt to cast any object types to better types return a copy of
the block (if copy = True) by definition we ARE an ObjectBlock!!!!!
"""

# operate column-by-column
def f(mask, val, idx):
shape = val.shape
Expand All @@ -2520,7 +2519,6 @@ def f(mask, val, idx):
datetime=datetime,
numeric=numeric,
timedelta=timedelta,
coerce=coerce,
copy=copy,
)
if isinstance(values, np.ndarray):
Expand Down
2 changes: 0 additions & 2 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,15 +636,13 @@ def convert(
datetime: bool = True,
numeric: bool = True,
timedelta: bool = True,
coerce: bool = False,
) -> "BlockManager":
return self.apply(
"convert",
copy=copy,
datetime=datetime,
numeric=numeric,
timedelta=timedelta,
coerce=coerce,
)

def replace(self, to_replace, value, inplace: bool, regex: bool) -> "BlockManager":
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/methods/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ def make_dtnat_arr(n, nnat=None):
df = DataFrame(dict(a=s1, b=s2))
df.to_csv(pth, chunksize=chunksize)

recons = self.read_csv(pth)._convert(datetime=True, coerce=True)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

interesting we must have removed all the internal uses of _convert (or we should)

recons = self.read_csv(pth).apply(to_datetime)
tm.assert_frame_equal(df, recons, check_names=False)

@pytest.mark.slow
Expand Down
12 changes: 10 additions & 2 deletions pandas/tests/io/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,15 @@
from pandas.errors import ParserError
import pandas.util._test_decorators as td

from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range, read_csv
from pandas import (
DataFrame,
MultiIndex,
Series,
Timestamp,
date_range,
read_csv,
to_datetime,
)
import pandas._testing as tm

from pandas.io.common import file_path_to_url
Expand Down Expand Up @@ -610,7 +618,7 @@ def try_remove_ws(x):
gtnew = ground_truth.applymap(try_remove_ws)
converted = dfnew._convert(datetime=True, numeric=True)
date_cols = ["Closing Date", "Updated Date"]
converted[date_cols] = converted[date_cols]._convert(datetime=True, coerce=True)
converted[date_cols] = converted[date_cols].apply(to_datetime)
tm.assert_frame_equal(converted, gtnew)

@pytest.mark.slow
Expand Down
76 changes: 4 additions & 72 deletions pandas/tests/series/methods/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,45 +3,23 @@
import numpy as np
import pytest

from pandas import NaT, Series, Timestamp
from pandas import Series, Timestamp
import pandas._testing as tm


class TestConvert:
def test_convert(self):
# GH#10265
# Tests: All to nans, coerce, true
# Test coercion returns correct type
ser = Series(["a", "b", "c"])
results = ser._convert(datetime=True, coerce=True)
expected = Series([NaT] * 3)
tm.assert_series_equal(results, expected)

results = ser._convert(numeric=True, coerce=True)
expected = Series([np.nan] * 3)
tm.assert_series_equal(results, expected)

expected = Series([NaT] * 3, dtype=np.dtype("m8[ns]"))
results = ser._convert(timedelta=True, coerce=True)
tm.assert_series_equal(results, expected)

dt = datetime(2001, 1, 1, 0, 0)
td = dt - datetime(2000, 1, 1, 0, 0)

# Test coercion with mixed types
ser = Series(["a", "3.1415", dt, td])
results = ser._convert(datetime=True, coerce=True)
expected = Series([NaT, NaT, dt, NaT])
tm.assert_series_equal(results, expected)

results = ser._convert(numeric=True, coerce=True)
results = ser._convert(numeric=True)
expected = Series([np.nan, 3.1415, np.nan, np.nan])
tm.assert_series_equal(results, expected)

results = ser._convert(timedelta=True, coerce=True)
expected = Series([NaT, NaT, NaT, td], dtype=np.dtype("m8[ns]"))
tm.assert_series_equal(results, expected)

# Test standard conversion returns original
results = ser._convert(datetime=True)
tm.assert_series_equal(results, ser)
Expand Down Expand Up @@ -116,19 +94,6 @@ def test_convert(self):
datetime(2001, 1, 3, 0, 0),
]
)
s2 = Series(
[
datetime(2001, 1, 1, 0, 0),
datetime(2001, 1, 2, 0, 0),
datetime(2001, 1, 3, 0, 0),
"foo",
1.0,
1,
Timestamp("20010104"),
"20010105",
],
dtype="O",
)

result = ser._convert(datetime=True)
expected = Series(
Expand All @@ -137,35 +102,12 @@ def test_convert(self):
)
tm.assert_series_equal(result, expected)

result = ser._convert(datetime=True, coerce=True)
tm.assert_series_equal(result, expected)

expected = Series(
[
Timestamp("20010101"),
Timestamp("20010102"),
Timestamp("20010103"),
NaT,
NaT,
NaT,
Timestamp("20010104"),
Timestamp("20010105"),
],
dtype="M8[ns]",
)
result = s2._convert(datetime=True, numeric=False, timedelta=False, coerce=True)
tm.assert_series_equal(result, expected)
result = s2._convert(datetime=True, coerce=True)
tm.assert_series_equal(result, expected)

ser = Series(["foo", "bar", 1, 1.0], dtype="O")
result = ser._convert(datetime=True, coerce=True)
expected = Series([NaT] * 2 + [Timestamp(1)] * 2)
result = ser._convert(datetime=True)
tm.assert_series_equal(result, expected)

# preserver if non-object
ser = Series([1], dtype="float32")
result = ser._convert(datetime=True, coerce=True)
result = ser._convert(datetime=True)
tm.assert_series_equal(result, ser)

# FIXME: dont leave commented-out
Expand All @@ -174,16 +116,6 @@ def test_convert(self):
# result = res._convert(convert_dates=True,convert_numeric=False)
# assert result.dtype == 'M8[ns]'

# dateutil parses some single letters into today's value as a date
expected = Series([NaT])
for x in "abcdefghijklmnopqrstuvwxyz":
ser = Series([x])
result = ser._convert(datetime=True, coerce=True)
tm.assert_series_equal(result, expected)
ser = Series([x.upper()])
result = ser._convert(datetime=True, coerce=True)
tm.assert_series_equal(result, expected)

def test_convert_no_arg_error(self):
ser = Series(["1.0", "2"])
msg = r"At least one of datetime, numeric or timedelta must be True\."
Expand Down