diff --git a/pandas/tests/strings/test_split_partition.py b/pandas/tests/strings/test_split_partition.py index 01a397938db52..cbe4f950494be 100644 --- a/pandas/tests/strings/test_split_partition.py +++ b/pandas/tests/strings/test_split_partition.py @@ -14,21 +14,28 @@ ) -def test_split(any_string_dtype): +@pytest.mark.parametrize("method", ["split", "rsplit"]) +def test_split(any_string_dtype, method): values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"], dtype=any_string_dtype) - result = values.str.split("_") + result = getattr(values.str, method)("_") exp = Series([["a", "b", "c"], ["c", "d", "e"], np.nan, ["f", "g", "h"]]) tm.assert_series_equal(result, exp) + +@pytest.mark.parametrize("method", ["split", "rsplit"]) +def test_split_more_than_one_char(any_string_dtype, method): # more than one char values = Series(["a__b__c", "c__d__e", np.nan, "f__g__h"], dtype=any_string_dtype) - result = values.str.split("__") + result = getattr(values.str, method)("__") + exp = Series([["a", "b", "c"], ["c", "d", "e"], np.nan, ["f", "g", "h"]]) tm.assert_series_equal(result, exp) - result = values.str.split("__", expand=False) + result = getattr(values.str, method)("__", expand=False) tm.assert_series_equal(result, exp) + +def test_split_more_regex_split(any_string_dtype): # regex split values = Series(["a,b_c", "c_d,e", np.nan, "f,g,h"], dtype=any_string_dtype) result = values.str.split("[,_]") @@ -44,6 +51,8 @@ def test_split_regex(any_string_dtype): exp = Series([["xxxjpgzzz", ""]]) tm.assert_series_equal(result, exp) + +def test_split_regex_explicit(any_string_dtype): # explicit regex = True split with compiled regex regex_pat = re.compile(r".jpg") values = Series("xxxjpgzzz.jpg", dtype=any_string_dtype) @@ -74,9 +83,11 @@ def test_split_regex(any_string_dtype): values.str.split(regex_pat, regex=False) -def test_split_object_mixed(): +@pytest.mark.parametrize("expand", [None, False]) +@pytest.mark.parametrize("method", ["split", "rsplit"]) +def test_split_object_mixed(expand, method): mixed = Series(["a_b_c", np.nan, "d_e_f", True, datetime.today(), None, 1, 2.0]) - result = mixed.str.split("_") + result = getattr(mixed.str, method)("_", expand=expand) exp = Series( [ ["a", "b", "c"], @@ -92,43 +103,26 @@ def test_split_object_mixed(): assert isinstance(result, Series) tm.assert_almost_equal(result, exp) - result = mixed.str.split("_", expand=False) - assert isinstance(result, Series) - tm.assert_almost_equal(result, exp) - @pytest.mark.parametrize("method", ["split", "rsplit"]) -def test_split_n(any_string_dtype, method): +@pytest.mark.parametrize("n", [None, 0]) +def test_split_n(any_string_dtype, method, n): s = Series(["a b", pd.NA, "b c"], dtype=any_string_dtype) expected = Series([["a", "b"], pd.NA, ["b", "c"]]) - result = getattr(s.str, method)(" ", n=None) - tm.assert_series_equal(result, expected) - - result = getattr(s.str, method)(" ", n=0) + result = getattr(s.str, method)(" ", n=n) tm.assert_series_equal(result, expected) def test_rsplit(any_string_dtype): - values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"], dtype=any_string_dtype) - result = values.str.rsplit("_") - exp = Series([["a", "b", "c"], ["c", "d", "e"], np.nan, ["f", "g", "h"]]) - tm.assert_series_equal(result, exp) - - # more than one char - values = Series(["a__b__c", "c__d__e", np.nan, "f__g__h"], dtype=any_string_dtype) - result = values.str.rsplit("__") - tm.assert_series_equal(result, exp) - - result = values.str.rsplit("__", expand=False) - tm.assert_series_equal(result, exp) - # regex split is not supported by rsplit values = Series(["a,b_c", "c_d,e", np.nan, "f,g,h"], dtype=any_string_dtype) result = values.str.rsplit("[,_]") exp = Series([["a,b_c"], ["c_d,e"], np.nan, ["f,g,h"]]) tm.assert_series_equal(result, exp) + +def test_rsplit_max_number(any_string_dtype): # setting max number of splits, make sure it's from reverse values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"], dtype=any_string_dtype) result = values.str.rsplit("_", n=1) @@ -136,30 +130,6 @@ def test_rsplit(any_string_dtype): tm.assert_series_equal(result, exp) -def test_rsplit_object_mixed(): - # mixed - mixed = Series(["a_b_c", np.nan, "d_e_f", True, datetime.today(), None, 1, 2.0]) - result = mixed.str.rsplit("_") - exp = Series( - [ - ["a", "b", "c"], - np.nan, - ["d", "e", "f"], - np.nan, - np.nan, - np.nan, - np.nan, - np.nan, - ] - ) - assert isinstance(result, Series) - tm.assert_almost_equal(result, exp) - - result = mixed.str.rsplit("_", expand=False) - assert isinstance(result, Series) - tm.assert_almost_equal(result, exp) - - def test_split_blank_string(any_string_dtype): # expand blank split GH 20067 values = Series([""], name="test", dtype=any_string_dtype) @@ -167,6 +137,8 @@ def test_split_blank_string(any_string_dtype): exp = DataFrame([[]], dtype=any_string_dtype) # NOTE: this is NOT an empty df tm.assert_frame_equal(result, exp) + +def test_split_blank_string_with_non_empty(any_string_dtype): values = Series(["a b c", "a b", "", " "], name="test", dtype=any_string_dtype) result = values.str.split(expand=True) exp = DataFrame( @@ -181,14 +153,13 @@ def test_split_blank_string(any_string_dtype): tm.assert_frame_equal(result, exp) -def test_split_noargs(any_string_dtype): +@pytest.mark.parametrize("method", ["split", "rsplit"]) +def test_split_noargs(any_string_dtype, method): # #1859 s = Series(["Wes McKinney", "Travis Oliphant"], dtype=any_string_dtype) - result = s.str.split() + result = getattr(s.str, method)() expected = ["Travis", "Oliphant"] assert result[1] == expected - result = s.str.rsplit() - assert result[1] == expected @pytest.mark.parametrize( @@ -199,17 +170,15 @@ def test_split_noargs(any_string_dtype): (["bd_asdf_jfg", "kjasdflqw_asdfnfk"], "_"), ], ) -def test_split_maxsplit(data, pat, any_string_dtype): +@pytest.mark.parametrize("n", [-1, 0]) +def test_split_maxsplit(data, pat, any_string_dtype, n): # re.split 0, str.split -1 s = Series(data, dtype=any_string_dtype) - result = s.str.split(pat=pat, n=-1) + result = s.str.split(pat=pat, n=n) xp = s.str.split(pat=pat) tm.assert_series_equal(result, xp) - result = s.str.split(pat=pat, n=0) - tm.assert_series_equal(result, xp) - @pytest.mark.parametrize( "data, pat, expected", @@ -232,12 +201,14 @@ def test_split_no_pat_with_nonzero_n(data, pat, expected, any_string_dtype): tm.assert_series_equal(expected, result, check_index_type=False) -def test_split_to_dataframe(any_string_dtype): +def test_split_to_dataframe_no_splits(any_string_dtype): s = Series(["nosplit", "alsonosplit"], dtype=any_string_dtype) result = s.str.split("_", expand=True) exp = DataFrame({0: Series(["nosplit", "alsonosplit"], dtype=any_string_dtype)}) tm.assert_frame_equal(result, exp) + +def test_split_to_dataframe(any_string_dtype): s = Series(["some_equal_splits", "with_no_nans"], dtype=any_string_dtype) result = s.str.split("_", expand=True) exp = DataFrame( @@ -246,6 +217,8 @@ def test_split_to_dataframe(any_string_dtype): ) tm.assert_frame_equal(result, exp) + +def test_split_to_dataframe_unequal_splits(any_string_dtype): s = Series( ["some_unequal_splits", "one_of_these_things_is_not"], dtype=any_string_dtype ) @@ -263,6 +236,8 @@ def test_split_to_dataframe(any_string_dtype): ) tm.assert_frame_equal(result, exp) + +def test_split_to_dataframe_with_index(any_string_dtype): s = Series( ["some_splits", "with_index"], index=["preserve", "me"], dtype=any_string_dtype ) @@ -278,7 +253,7 @@ def test_split_to_dataframe(any_string_dtype): s.str.split("_", expand="not_a_boolean") -def test_split_to_multiindex_expand(): +def test_split_to_multiindex_expand_no_splits(): # https://github.com/pandas-dev/pandas/issues/23677 idx = Index(["nosplit", "alsonosplit", np.nan]) @@ -287,6 +262,8 @@ def test_split_to_multiindex_expand(): tm.assert_index_equal(result, exp) assert result.nlevels == 1 + +def test_split_to_multiindex_expand(): idx = Index(["some_equal_splits", "with_no_nans", np.nan, None]) result = idx.str.split("_", expand=True) exp = MultiIndex.from_tuples( @@ -300,6 +277,8 @@ def test_split_to_multiindex_expand(): tm.assert_index_equal(result, exp) assert result.nlevels == 3 + +def test_split_to_multiindex_expand_unequal_splits(): idx = Index(["some_unequal_splits", "one_of_these_things_is_not", np.nan, None]) result = idx.str.split("_", expand=True) exp = MultiIndex.from_tuples( @@ -317,12 +296,14 @@ def test_split_to_multiindex_expand(): idx.str.split("_", expand="not_a_boolean") -def test_rsplit_to_dataframe_expand(any_string_dtype): +def test_rsplit_to_dataframe_expand_no_splits(any_string_dtype): s = Series(["nosplit", "alsonosplit"], dtype=any_string_dtype) result = s.str.rsplit("_", expand=True) exp = DataFrame({0: Series(["nosplit", "alsonosplit"])}, dtype=any_string_dtype) tm.assert_frame_equal(result, exp) + +def test_rsplit_to_dataframe_expand(any_string_dtype): s = Series(["some_equal_splits", "with_no_nans"], dtype=any_string_dtype) result = s.str.rsplit("_", expand=True) exp = DataFrame( @@ -344,6 +325,8 @@ def test_rsplit_to_dataframe_expand(any_string_dtype): ) tm.assert_frame_equal(result, exp) + +def test_rsplit_to_dataframe_expand_with_index(any_string_dtype): s = Series( ["some_splits", "with_index"], index=["preserve", "me"], dtype=any_string_dtype ) @@ -356,19 +339,23 @@ def test_rsplit_to_dataframe_expand(any_string_dtype): tm.assert_frame_equal(result, exp) -def test_rsplit_to_multiindex_expand(): +def test_rsplit_to_multiindex_expand_no_split(): idx = Index(["nosplit", "alsonosplit"]) result = idx.str.rsplit("_", expand=True) exp = idx tm.assert_index_equal(result, exp) assert result.nlevels == 1 + +def test_rsplit_to_multiindex_expand(): idx = Index(["some_equal_splits", "with_no_nans"]) result = idx.str.rsplit("_", expand=True) exp = MultiIndex.from_tuples([("some", "equal", "splits"), ("with", "no", "nans")]) tm.assert_index_equal(result, exp) assert result.nlevels == 3 + +def test_rsplit_to_multiindex_expand_n(): idx = Index(["some_equal_splits", "with_no_nans"]) result = idx.str.rsplit("_", expand=True, n=1) exp = MultiIndex.from_tuples([("some_equal", "splits"), ("with_no", "nans")]) @@ -394,7 +381,7 @@ def test_split_nan_expand(any_string_dtype): assert all(x is pd.NA for x in result.iloc[1]) -def test_split_with_name(any_string_dtype): +def test_split_with_name_series(any_string_dtype): # GH 12617 # should preserve name @@ -407,6 +394,9 @@ def test_split_with_name(any_string_dtype): exp = DataFrame([["a", "b"], ["c", "d"]], dtype=any_string_dtype) tm.assert_frame_equal(res, exp) + +def test_split_with_name_index(): + # GH 12617 idx = Index(["a,b", "c,d"], name="xxx") res = idx.str.split(",") exp = Index([["a", "b"], ["c", "d"]], name="xxx") @@ -419,191 +409,210 @@ def test_split_with_name(any_string_dtype): tm.assert_index_equal(res, exp) -def test_partition_series(any_string_dtype): +@pytest.mark.parametrize( + "method, exp", + [ + [ + "partition", + [ + ("a", "__", "b__c"), + ("c", "__", "d__e"), + np.nan, + ("f", "__", "g__h"), + None, + ], + ], + [ + "rpartition", + [ + ("a__b", "__", "c"), + ("c__d", "__", "e"), + np.nan, + ("f__g", "__", "h"), + None, + ], + ], + ], +) +def test_partition_series_more_than_one_char(method, exp): # https://github.com/pandas-dev/pandas/issues/23558 - - s = Series(["a_b_c", "c_d_e", np.nan, "f_g_h", None], dtype=any_string_dtype) - - result = s.str.partition("_", expand=False) - expected = Series( - [("a", "_", "b_c"), ("c", "_", "d_e"), np.nan, ("f", "_", "g_h"), None] - ) + # more than one char + s = Series(["a__b__c", "c__d__e", np.nan, "f__g__h", None]) + result = getattr(s.str, method)("__", expand=False) + expected = Series(exp) tm.assert_series_equal(result, expected) - result = s.str.rpartition("_", expand=False) - expected = Series( - [("a_b", "_", "c"), ("c_d", "_", "e"), np.nan, ("f_g", "_", "h"), None] - ) - tm.assert_series_equal(result, expected) - # more than one char - s = Series(["a__b__c", "c__d__e", np.nan, "f__g__h", None]) - result = s.str.partition("__", expand=False) - expected = Series( +@pytest.mark.parametrize( + "method, exp", + [ [ - ("a", "__", "b__c"), - ("c", "__", "d__e"), - np.nan, - ("f", "__", "g__h"), - None, + "partition", + [("a", " ", "b c"), ("c", " ", "d e"), np.nan, ("f", " ", "g h"), None], ], - ) - tm.assert_series_equal(result, expected) - - result = s.str.rpartition("__", expand=False) - expected = Series( [ - ("a__b", "__", "c"), - ("c__d", "__", "e"), - np.nan, - ("f__g", "__", "h"), - None, + "rpartition", + [("a b", " ", "c"), ("c d", " ", "e"), np.nan, ("f g", " ", "h"), None], ], - ) - tm.assert_series_equal(result, expected) - + ], +) +def test_partition_series_none(any_string_dtype, method, exp): + # https://github.com/pandas-dev/pandas/issues/23558 # None s = Series(["a b c", "c d e", np.nan, "f g h", None], dtype=any_string_dtype) - result = s.str.partition(expand=False) - expected = Series( - [("a", " ", "b c"), ("c", " ", "d e"), np.nan, ("f", " ", "g h"), None] - ) + result = getattr(s.str, method)(expand=False) + expected = Series(exp) tm.assert_series_equal(result, expected) - result = s.str.rpartition(expand=False) - expected = Series( - [("a b", " ", "c"), ("c d", " ", "e"), np.nan, ("f g", " ", "h"), None] - ) - tm.assert_series_equal(result, expected) +@pytest.mark.parametrize( + "method, exp", + [ + [ + "partition", + [("abc", "", ""), ("cde", "", ""), np.nan, ("fgh", "", ""), None], + ], + [ + "rpartition", + [("", "", "abc"), ("", "", "cde"), np.nan, ("", "", "fgh"), None], + ], + ], +) +def test_partition_series_not_split(any_string_dtype, method, exp): + # https://github.com/pandas-dev/pandas/issues/23558 # Not split s = Series(["abc", "cde", np.nan, "fgh", None], dtype=any_string_dtype) - result = s.str.partition("_", expand=False) - expected = Series([("abc", "", ""), ("cde", "", ""), np.nan, ("fgh", "", ""), None]) + result = getattr(s.str, method)("_", expand=False) + expected = Series(exp) tm.assert_series_equal(result, expected) - result = s.str.rpartition("_", expand=False) - expected = Series([("", "", "abc"), ("", "", "cde"), np.nan, ("", "", "fgh"), None]) - tm.assert_series_equal(result, expected) +@pytest.mark.parametrize( + "method, exp", + [ + [ + "partition", + [("a", "_", "b_c"), ("c", "_", "d_e"), np.nan, ("f", "_", "g_h")], + ], + [ + "rpartition", + [("a_b", "_", "c"), ("c_d", "_", "e"), np.nan, ("f_g", "_", "h")], + ], + ], +) +def test_partition_series_unicode(any_string_dtype, method, exp): + # https://github.com/pandas-dev/pandas/issues/23558 # unicode s = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"], dtype=any_string_dtype) - result = s.str.partition("_", expand=False) - expected = Series([("a", "_", "b_c"), ("c", "_", "d_e"), np.nan, ("f", "_", "g_h")]) + result = getattr(s.str, method)("_", expand=False) + expected = Series(exp) tm.assert_series_equal(result, expected) - result = s.str.rpartition("_", expand=False) - expected = Series([("a_b", "_", "c"), ("c_d", "_", "e"), np.nan, ("f_g", "_", "h")]) - tm.assert_series_equal(result, expected) +@pytest.mark.parametrize("method", ["partition", "rpartition"]) +def test_partition_series_stdlib(any_string_dtype, method): + # https://github.com/pandas-dev/pandas/issues/23558 # compare to standard lib s = Series(["A_B_C", "B_C_D", "E_F_G", "EFGHEF"], dtype=any_string_dtype) - result = s.str.partition("_", expand=False).tolist() - assert result == [v.partition("_") for v in s] - result = s.str.rpartition("_", expand=False).tolist() - assert result == [v.rpartition("_") for v in s] + result = getattr(s.str, method)("_", expand=False).tolist() + assert result == [getattr(v, method)("_") for v in s] -def test_partition_index(): +@pytest.mark.parametrize( + "method, expand, exp, exp_levels", + [ + [ + "partition", + False, + np.array( + [("a", "_", "b_c"), ("c", "_", "d_e"), ("f", "_", "g_h"), np.nan, None], + dtype=object, + ), + 1, + ], + [ + "rpartition", + False, + np.array( + [("a_b", "_", "c"), ("c_d", "_", "e"), ("f_g", "_", "h"), np.nan, None], + dtype=object, + ), + 1, + ], + ], +) +def test_partition_index(method, expand, exp, exp_levels): # https://github.com/pandas-dev/pandas/issues/23558 values = Index(["a_b_c", "c_d_e", "f_g_h", np.nan, None]) - result = values.str.partition("_", expand=False) - exp = Index( - np.array( - [("a", "_", "b_c"), ("c", "_", "d_e"), ("f", "_", "g_h"), np.nan, None], - dtype=object, - ) - ) + result = getattr(values.str, method)("_", expand=expand) + exp = Index(exp) tm.assert_index_equal(result, exp) - assert result.nlevels == 1 + assert result.nlevels == exp_levels - result = values.str.rpartition("_", expand=False) - exp = Index( - np.array( - [("a_b", "_", "c"), ("c_d", "_", "e"), ("f_g", "_", "h"), np.nan, None], - dtype=object, - ) - ) - tm.assert_index_equal(result, exp) - assert result.nlevels == 1 - result = values.str.partition("_") - exp = Index( +@pytest.mark.parametrize( + "method, exp", + [ [ - ("a", "_", "b_c"), - ("c", "_", "d_e"), - ("f", "_", "g_h"), - (np.nan, np.nan, np.nan), - (None, None, None), - ] - ) - tm.assert_index_equal(result, exp) - assert isinstance(result, MultiIndex) - assert result.nlevels == 3 - - result = values.str.rpartition("_") - exp = Index( + "partition", + { + 0: ["a", "c", np.nan, "f", None], + 1: ["_", "_", np.nan, "_", None], + 2: ["b_c", "d_e", np.nan, "g_h", None], + }, + ], [ - ("a_b", "_", "c"), - ("c_d", "_", "e"), - ("f_g", "_", "h"), - (np.nan, np.nan, np.nan), - (None, None, None), - ] - ) - tm.assert_index_equal(result, exp) - assert isinstance(result, MultiIndex) - assert result.nlevels == 3 - - -def test_partition_to_dataframe(any_string_dtype): + "rpartition", + { + 0: ["a_b", "c_d", np.nan, "f_g", None], + 1: ["_", "_", np.nan, "_", None], + 2: ["c", "e", np.nan, "h", None], + }, + ], + ], +) +def test_partition_to_dataframe(any_string_dtype, method, exp): # https://github.com/pandas-dev/pandas/issues/23558 s = Series(["a_b_c", "c_d_e", np.nan, "f_g_h", None], dtype=any_string_dtype) - result = s.str.partition("_") + result = getattr(s.str, method)("_") expected = DataFrame( - { - 0: ["a", "c", np.nan, "f", None], - 1: ["_", "_", np.nan, "_", None], - 2: ["b_c", "d_e", np.nan, "g_h", None], - }, + exp, dtype=any_string_dtype, ) tm.assert_frame_equal(result, expected) - result = s.str.rpartition("_") - expected = DataFrame( - { - 0: ["a_b", "c_d", np.nan, "f_g", None], - 1: ["_", "_", np.nan, "_", None], - 2: ["c", "e", np.nan, "h", None], - }, - dtype=any_string_dtype, - ) - tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize( + "method, exp", + [ + [ + "partition", + { + 0: ["a", "c", np.nan, "f", None], + 1: ["_", "_", np.nan, "_", None], + 2: ["b_c", "d_e", np.nan, "g_h", None], + }, + ], + [ + "rpartition", + { + 0: ["a_b", "c_d", np.nan, "f_g", None], + 1: ["_", "_", np.nan, "_", None], + 2: ["c", "e", np.nan, "h", None], + }, + ], + ], +) +def test_partition_to_dataframe_from_series(any_string_dtype, method, exp): + # https://github.com/pandas-dev/pandas/issues/23558 s = Series(["a_b_c", "c_d_e", np.nan, "f_g_h", None], dtype=any_string_dtype) - result = s.str.partition("_", expand=True) + result = getattr(s.str, method)("_", expand=True) expected = DataFrame( - { - 0: ["a", "c", np.nan, "f", None], - 1: ["_", "_", np.nan, "_", None], - 2: ["b_c", "d_e", np.nan, "g_h", None], - }, - dtype=any_string_dtype, - ) - tm.assert_frame_equal(result, expected) - - result = s.str.rpartition("_", expand=True) - expected = DataFrame( - { - 0: ["a_b", "c_d", np.nan, "f_g", None], - 1: ["_", "_", np.nan, "_", None], - 2: ["c", "e", np.nan, "h", None], - }, + exp, dtype=any_string_dtype, ) tm.assert_frame_equal(result, expected) @@ -619,7 +628,11 @@ def test_partition_with_name(any_string_dtype): ) tm.assert_frame_equal(result, expected) + +def test_partition_with_name_expand(any_string_dtype): + # GH 12617 # should preserve name + s = Series(["a,b", "c,d"], name="xxx", dtype=any_string_dtype) result = s.str.partition(",", expand=False) expected = Series([("a", ",", "b"), ("c", ",", "d")], name="xxx") tm.assert_series_equal(result, expected) @@ -632,6 +645,9 @@ def test_partition_index_with_name(): assert result.nlevels == 3 tm.assert_index_equal(result, expected) + +def test_partition_index_with_name_expand_false(): + idx = Index(["a,b", "c,d"], name="xxx") # should preserve name result = idx.str.partition(",", expand=False) expected = Index(np.array([("a", ",", "b"), ("c", ",", "d")]), name="xxx") @@ -639,16 +655,13 @@ def test_partition_index_with_name(): tm.assert_index_equal(result, expected) -def test_partition_sep_kwarg(any_string_dtype): +@pytest.mark.parametrize("method", ["partition", "rpartition"]) +def test_partition_sep_kwarg(any_string_dtype, method): # GH 22676; depr kwarg "pat" in favor of "sep" s = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"], dtype=any_string_dtype) - expected = s.str.partition(sep="_") - result = s.str.partition("_") - tm.assert_frame_equal(result, expected) - - expected = s.str.rpartition(sep="_") - result = s.str.rpartition("_") + expected = getattr(s.str, method)(sep="_") + result = getattr(s.str, method)("_") tm.assert_frame_equal(result, expected) @@ -666,30 +679,23 @@ def test_get_mixed_object(): tm.assert_series_equal(result, expected) -def test_get_bounds(): +@pytest.mark.parametrize("idx", [2, -3]) +def test_get_bounds(idx): ser = Series(["1_2_3_4_5", "6_7_8_9_10", "11_12"]) - - # positive index - result = ser.str.split("_").str.get(2) - expected = Series(["3", "8", np.nan]) - tm.assert_series_equal(result, expected) - - # negative index - result = ser.str.split("_").str.get(-3) + result = ser.str.split("_").str.get(idx) expected = Series(["3", "8", np.nan]) tm.assert_series_equal(result, expected) -def test_get_complex(): +@pytest.mark.parametrize( + "idx, exp", [[2, [3, 3, np.nan, "b"]], [-1, [3, 3, np.nan, np.nan]]] +) +def test_get_complex(idx, exp): # GH 20671, getting value not in dict raising `KeyError` ser = Series([(1, 2, 3), [1, 2, 3], {1, 2, 3}, {1: "a", 2: "b", 3: "c"}]) - result = ser.str.get(1) - expected = Series([2, 2, np.nan, "a"]) - tm.assert_series_equal(result, expected) - - result = ser.str.get(-1) - expected = Series([3, 3, np.nan, np.nan]) + result = ser.str.get(idx) + expected = Series(exp) tm.assert_series_equal(result, expected)