Skip to content

Commit f6c00ff

Browse files
authored
Deprecate datelike isin casting strings to dates to match pandas 2.2 (#15046)
Matching pandas-dev/pandas#56427 Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: #15046
1 parent c0e370b commit f6c00ff

File tree

3 files changed

+33
-19
lines changed

3 files changed

+33
-19
lines changed

python/cudf/cudf/core/tools/datetimes.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -767,10 +767,20 @@ def _isin_datetimelike(
767767
rhs = None
768768
try:
769769
rhs = cudf.core.column.as_column(values)
770+
was_string = len(rhs) and rhs.dtype.kind == "O"
770771

771772
if rhs.dtype.kind in {"f", "i", "u"}:
772773
return cudf.core.column.full(len(lhs), False, dtype="bool")
773774
rhs = rhs.astype(lhs.dtype)
775+
if was_string:
776+
warnings.warn(
777+
f"The behavior of 'isin' with dtype={lhs.dtype} and "
778+
"castable values (e.g. strings) is deprecated. In a "
779+
"future version, these will not be considered matching "
780+
"by isin. Explicitly cast to the appropriate dtype before "
781+
"calling isin instead.",
782+
FutureWarning,
783+
)
774784
res = lhs._isin_earlystop(rhs)
775785
if res is not None:
776786
return res

python/cudf/cudf/tests/test_index.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2497,19 +2497,12 @@ def test_index_nan_as_null(data, nan_idx, NA_idx, nan_as_null):
24972497

24982498

24992499
@pytest.mark.parametrize(
2500-
"data",
2500+
"index",
25012501
[
2502-
[],
2503-
pd.Series(
2504-
["this", "is", None, "a", "test"], index=["a", "b", "c", "d", "e"]
2505-
),
2506-
pd.Series([0, 15, 10], index=[0, None, 9]),
2507-
pd.Series(
2508-
range(25),
2509-
index=pd.date_range(
2510-
start="2019-01-01", end="2019-01-02", freq="h"
2511-
),
2512-
),
2502+
pd.Index([]),
2503+
pd.Index(["a", "b", "c", "d", "e"]),
2504+
pd.Index([0, None, 9]),
2505+
pd.date_range("2019-01-01", periods=3),
25132506
],
25142507
)
25152508
@pytest.mark.parametrize(
@@ -2521,12 +2514,19 @@ def test_index_nan_as_null(data, nan_idx, NA_idx, nan_as_null):
25212514
["2019-01-01 04:00:00", "2019-01-01 06:00:00", "2018-03-02 10:00:00"],
25222515
],
25232516
)
2524-
def test_isin_index(data, values):
2525-
psr = pd.Series(data)
2526-
gsr = cudf.Series.from_pandas(psr)
2517+
def test_isin_index(index, values):
2518+
pidx = index
2519+
gidx = cudf.Index.from_pandas(pidx)
25272520

2528-
got = gsr.index.isin(values)
2529-
expected = psr.index.isin(values)
2521+
is_dt_str = (
2522+
next(iter(values), None) == "2019-01-01 04:00:00"
2523+
and len(pidx)
2524+
and pidx.dtype.kind == "M"
2525+
)
2526+
with expect_warning_if(is_dt_str):
2527+
got = gidx.isin(values)
2528+
with expect_warning_if(PANDAS_GE_220 and is_dt_str):
2529+
expected = pidx.isin(values)
25302530

25312531
assert_eq(got, expected)
25322532

python/cudf/cudf/tests/test_series.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
import cudf
1717
from cudf.api.extensions import no_default
18+
from cudf.core._compat import PANDAS_GE_220
1819
from cudf.errors import MixedTypeError
1920
from cudf.testing._utils import (
2021
NUMERIC_TYPES,
@@ -1795,8 +1796,11 @@ def test_isin_datetime(data, values):
17951796
psr = pd.Series(data)
17961797
gsr = cudf.Series.from_pandas(psr)
17971798

1798-
got = gsr.isin(values)
1799-
expected = psr.isin(values)
1799+
is_len_str = isinstance(next(iter(values), None), str) and len(data)
1800+
with expect_warning_if(is_len_str):
1801+
got = gsr.isin(values)
1802+
with expect_warning_if(PANDAS_GE_220 and is_len_str):
1803+
expected = psr.isin(values)
18001804
assert_eq(got, expected)
18011805

18021806

0 commit comments

Comments
 (0)