From c2b9380b4c73a5087a0c54a27fb3feb71ad273ac Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 7 Nov 2020 18:04:40 -0800 Subject: [PATCH 1/4] TST/REF: Collect indexing tests by method --- pandas/tests/indexing/test_iloc.py | 10 +- pandas/tests/indexing/test_loc.py | 86 +++++++++++++++ pandas/tests/indexing/test_timedelta.py | 106 ------------------- pandas/tests/series/indexing/test_setitem.py | 9 ++ 4 files changed, 104 insertions(+), 107 deletions(-) delete mode 100644 pandas/tests/indexing/test_timedelta.py diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index f8dfda3dab486..47e161833bca5 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -7,7 +7,7 @@ import pytest import pandas as pd -from pandas import CategoricalDtype, DataFrame, Series, concat, date_range, isna +from pandas import CategoricalDtype, DataFrame, NaT, Series, concat, date_range, isna import pandas._testing as tm from pandas.api.types import is_scalar from pandas.core.indexing import IndexingError @@ -769,6 +769,14 @@ def test_iloc_getitem_categorical_values(self): expected = Series([1]).astype(CategoricalDtype([1, 2, 3])) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("value", [None, NaT, np.nan]) + def test_iloc_setitem_td64_values_cast_na(self, value): + # GH#18586 + series = Series([0, 1, 2], dtype="timedelta64[ns]") + series.iloc[0] = value + expected = pd.Series([NaT, 1, 2], dtype="timedelta64[ns]") + tm.assert_series_equal(series, expected) + class TestILocSetItemDuplicateColumns: def test_iloc_setitem_scalar_duplicate_columns(self): diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 6939b280a988b..dcbf08283f254 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -22,6 +22,7 @@ date_range, timedelta_range, to_datetime, + to_timedelta, ) import pandas._testing as tm from pandas.api.types import is_scalar @@ -1099,6 +1100,32 @@ def test_loc_setitem_int_label_with_float64index(self): tm.assert_series_equal(ser, tmp) + @pytest.mark.parametrize( + "indexer, expected", + [ + # The test name is a misnomer in the 0 case as df.index[indexer] + # is a scalar. + (0, [20, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + (slice(4, 8), [0, 1, 2, 3, 20, 20, 20, 20, 8, 9]), + ([3, 5], [0, 1, 2, 20, 4, 20, 6, 7, 8, 9]), + ], + ) + def test_loc_setitem_listlike_with_timedelta64index(self, indexer, expected): + # GH#16637 + tdi = to_timedelta(range(10), unit="s") + df = DataFrame({"x": range(10)}, dtype="int64", index=tdi) + + df.loc[df.index[indexer], "x"] = 20 + + expected = DataFrame( + expected, + index=tdi, + columns=["x"], + dtype="int64", + ) + + tm.assert_frame_equal(expected, df) + class TestLocWithMultiIndex: @pytest.mark.parametrize( @@ -1453,6 +1480,13 @@ def test_loc_getitem_partial_string_slicing_with_timedeltaindex(self): tm.assert_series_equal(result, expected) + def test_loc_getitem_str_timedeltaindex(self): + # GH#16896 + df = DataFrame({"x": range(3)}, index=to_timedelta(range(3), unit="days")) + expected = df.iloc[0] + sliced = df.loc["0 days"] + tm.assert_series_equal(sliced, expected) + class TestLabelSlicing: def test_loc_getitem_label_slice_across_dst(self): @@ -1513,8 +1547,45 @@ def test_loc_getitem_float_slice_float64index(self): assert len(ser.loc[12.0:]) == 8 assert len(ser.loc[12.5:]) == 7 + @pytest.mark.parametrize( + "start,stop, expected_slice", + [ + [np.timedelta64(0, "ns"), None, slice(0, 11)], + [np.timedelta64(1, "D"), np.timedelta64(6, "D"), slice(1, 7)], + [None, np.timedelta64(4, "D"), slice(0, 5)], + ], + ) + def test_loc_getitem_slice_label_td64obj(self, start, stop, expected_slice): + # GH#20393 + ser = Series(range(11), timedelta_range("0 days", "10 days")) + result = ser.loc[slice(start, stop)] + expected = ser.iloc[expected_slice] + tm.assert_series_equal(result, expected) + class TestLocBooleanMask: + def test_loc_setitem_bool_mask_timedeltaindex(self): + # GH#14946 + df = DataFrame({"x": range(10)}) + df.index = to_timedelta(range(10), unit="s") + conditions = [df["x"] > 3, df["x"] == 3, df["x"] < 3] + expected_data = [ + [0, 1, 2, 3, 10, 10, 10, 10, 10, 10], + [0, 1, 2, 10, 4, 5, 6, 7, 8, 9], + [10, 10, 10, 3, 4, 5, 6, 7, 8, 9], + ] + for cond, data in zip(conditions, expected_data): + result = df.copy() + result.loc[cond, "x"] = 10 + + expected = DataFrame( + data, + index=to_timedelta(range(10), unit="s"), + columns=["x"], + dtype="int64", + ) + tm.assert_frame_equal(expected, result) + def test_loc_setitem_mask_with_datetimeindex_tz(self): # GH#16889 # support .loc with alignment and tz-aware DatetimeIndex @@ -1557,6 +1628,21 @@ def test_loc_setitem_mask_and_label_with_datetimeindex(self): df.loc[mask, "C"] = df.loc[mask].index tm.assert_frame_equal(df, expected) + def test_loc_setitem_mask_td64_series_value(self): + # GH#23462 key list of bools, value is a Series + td1 = Timedelta(0) + td2 = Timedelta(28767471428571405) + df = DataFrame({"col": Series([td1, td2])}) + df_copy = df.copy() + ser = Series([td1]) + + expected = df["col"].iloc[1].value + df.loc[[True, False]] = ser + result = df["col"].iloc[1].value + + assert expected == result + tm.assert_frame_equal(df, df_copy) + def test_series_loc_getitem_label_list_missing_values(): # gh-11428 diff --git a/pandas/tests/indexing/test_timedelta.py b/pandas/tests/indexing/test_timedelta.py deleted file mode 100644 index 9461bb74b2a87..0000000000000 --- a/pandas/tests/indexing/test_timedelta.py +++ /dev/null @@ -1,106 +0,0 @@ -import numpy as np -import pytest - -import pandas as pd -import pandas._testing as tm - - -class TestTimedeltaIndexing: - def test_loc_setitem_bool_mask(self): - # GH 14946 - df = pd.DataFrame({"x": range(10)}) - df.index = pd.to_timedelta(range(10), unit="s") - conditions = [df["x"] > 3, df["x"] == 3, df["x"] < 3] - expected_data = [ - [0, 1, 2, 3, 10, 10, 10, 10, 10, 10], - [0, 1, 2, 10, 4, 5, 6, 7, 8, 9], - [10, 10, 10, 3, 4, 5, 6, 7, 8, 9], - ] - for cond, data in zip(conditions, expected_data): - result = df.copy() - result.loc[cond, "x"] = 10 - - expected = pd.DataFrame( - data, - index=pd.to_timedelta(range(10), unit="s"), - columns=["x"], - dtype="int64", - ) - tm.assert_frame_equal(expected, result) - - @pytest.mark.parametrize( - "indexer, expected", - [ - (0, [20, 1, 2, 3, 4, 5, 6, 7, 8, 9]), - (slice(4, 8), [0, 1, 2, 3, 20, 20, 20, 20, 8, 9]), - ([3, 5], [0, 1, 2, 20, 4, 20, 6, 7, 8, 9]), - ], - ) - def test_list_like_indexing(self, indexer, expected): - # GH 16637 - df = pd.DataFrame({"x": range(10)}, dtype="int64") - df.index = pd.to_timedelta(range(10), unit="s") - - df.loc[df.index[indexer], "x"] = 20 - - expected = pd.DataFrame( - expected, - index=pd.to_timedelta(range(10), unit="s"), - columns=["x"], - dtype="int64", - ) - - tm.assert_frame_equal(expected, df) - - def test_string_indexing(self): - # GH 16896 - df = pd.DataFrame({"x": range(3)}, index=pd.to_timedelta(range(3), unit="days")) - expected = df.iloc[0] - sliced = df.loc["0 days"] - tm.assert_series_equal(sliced, expected) - - @pytest.mark.parametrize("value", [None, pd.NaT, np.nan]) - def test_setitem_mask_na_value_td64(self, value): - # issue (#18586) - series = pd.Series([0, 1, 2], dtype="timedelta64[ns]") - series[series == series[0]] = value - expected = pd.Series([pd.NaT, 1, 2], dtype="timedelta64[ns]") - tm.assert_series_equal(series, expected) - - @pytest.mark.parametrize("value", [None, pd.NaT, np.nan]) - def test_listlike_setitem(self, value): - # issue (#18586) - series = pd.Series([0, 1, 2], dtype="timedelta64[ns]") - series.iloc[0] = value - expected = pd.Series([pd.NaT, 1, 2], dtype="timedelta64[ns]") - tm.assert_series_equal(series, expected) - - @pytest.mark.parametrize( - "start,stop, expected_slice", - [ - [np.timedelta64(0, "ns"), None, slice(0, 11)], - [np.timedelta64(1, "D"), np.timedelta64(6, "D"), slice(1, 7)], - [None, np.timedelta64(4, "D"), slice(0, 5)], - ], - ) - def test_numpy_timedelta_scalar_indexing(self, start, stop, expected_slice): - # GH 20393 - s = pd.Series(range(11), pd.timedelta_range("0 days", "10 days")) - result = s.loc[slice(start, stop)] - expected = s.iloc[expected_slice] - tm.assert_series_equal(result, expected) - - def test_roundtrip_thru_setitem(self): - # PR 23462 - dt1 = pd.Timedelta(0) - dt2 = pd.Timedelta(28767471428571405) - df = pd.DataFrame({"dt": pd.Series([dt1, dt2])}) - df_copy = df.copy() - s = pd.Series([dt1]) - - expected = df["dt"].iloc[1].value - df.loc[[True, False]] = s - result = df["dt"].iloc[1].value - - assert expected == result - tm.assert_frame_equal(df, df_copy) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 7e25e5200d610..4f00f9c931d6a 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -126,6 +126,15 @@ def test_setitem_boolean_different_order(self, string_series): tm.assert_series_equal(copy, expected) + @pytest.mark.parametrize("value", [None, NaT, np.nan]) + def test_setitem_boolean_td64_values_cast_na(self, value): + # GH#18586 + series = Series([0, 1, 2], dtype="timedelta64[ns]") + mask = series == series[0] + series[mask] = value + expected = Series([NaT, 1, 2], dtype="timedelta64[ns]") + tm.assert_series_equal(series, expected) + class TestSetitemViewCopySemantics: def test_setitem_invalidates_datetime_index_freq(self): From 20a89b04a6fee1616db93cd938bcfd5dbe4363c9 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 7 Nov 2020 20:27:38 -0800 Subject: [PATCH 2/4] TST/REF: collect indexing tests by method, de-duplicate --- pandas/tests/indexing/test_at.py | 18 +++++ pandas/tests/indexing/test_iloc.py | 20 ++++- pandas/tests/indexing/test_indexing.py | 103 ++++++++----------------- 3 files changed, 68 insertions(+), 73 deletions(-) diff --git a/pandas/tests/indexing/test_at.py b/pandas/tests/indexing/test_at.py index 46299fadf7789..d410a4137554b 100644 --- a/pandas/tests/indexing/test_at.py +++ b/pandas/tests/indexing/test_at.py @@ -17,6 +17,16 @@ def test_at_timezone(): tm.assert_frame_equal(result, expected) +class TestAtSetItem: + def test_at_setitem_mixed_index_assignment(self): + # GH#19860 + ser = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2]) + ser.at["a"] = 11 + assert ser.iat[0] == 11 + ser.at[1] = 22 + assert ser.iat[3] == 22 + + class TestAtWithDuplicates: def test_at_with_duplicate_axes_requires_scalar_lookup(self): # GH#33041 check that falling back to loc doesn't allow non-scalar @@ -108,3 +118,11 @@ def test_at_frame_raises_key_error2(self): df.at["a", 0] with pytest.raises(KeyError, match="^0$"): df.loc["a", 0] + + def test_at_getitem_mixed_index_no_fallback(self): + # GH#19860 + ser = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2]) + with pytest.raises(KeyError, match="^0$"): + ser.at[0] + with pytest.raises(KeyError, match="^4$"): + ser.at[4] diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index bd88e3793b8e3..fd1d5e4cedd65 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -7,7 +7,16 @@ import pytest import pandas as pd -from pandas import CategoricalDtype, DataFrame, NaT, Series, concat, date_range, isna +from pandas import ( + CategoricalDtype, + DataFrame, + Index, + NaT, + Series, + concat, + date_range, + isna, +) import pandas._testing as tm from pandas.api.types import is_scalar from pandas.core.indexing import IndexingError @@ -776,6 +785,15 @@ def test_iloc_setitem_td64_values_cast_na(self, value): expected = pd.Series([NaT, 1, 2], dtype="timedelta64[ns]") tm.assert_series_equal(series, expected) + def test_iloc_setitem_empty_frame_raises_with_3d_ndarray(self): + idx = Index([]) + obj = DataFrame(np.random.randn(len(idx), len(idx)), index=idx, columns=idx) + nd3 = np.random.randint(5, size=(2, 2, 2)) + + msg = f"Cannot set values with ndim > {obj.ndim}" + with pytest.raises(ValueError, match=msg): + obj.iloc[nd3] = 0 + class TestILocSetItemDuplicateColumns: def test_iloc_setitem_scalar_duplicate_columns(self): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 06bd8a5f300bb..472b29981e78c 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -15,6 +15,8 @@ from pandas.core.indexing import maybe_numeric_slice, non_reducing_slice from pandas.tests.indexing.common import _mklbl +from .test_floats import gen_obj + # ------------------------------------------------------------------------ # Indexing test cases @@ -56,14 +58,6 @@ def test_setitem_ndarray_1d(self): with pytest.raises(ValueError, match=msg): df[2:5] = np.arange(1, 4) * 1j - @pytest.mark.parametrize( - "obj", - [ - lambda i: Series(np.arange(len(i)), index=i), - lambda i: DataFrame(np.random.randn(len(i), len(i)), index=i, columns=i), - ], - ids=["Series", "DataFrame"], - ) @pytest.mark.parametrize( "idxr, idxr_id", [ @@ -72,9 +66,9 @@ def test_setitem_ndarray_1d(self): (lambda x: x.iloc, "iloc"), ], ) - def test_getitem_ndarray_3d(self, index, obj, idxr, idxr_id): + def test_getitem_ndarray_3d(self, index, frame_or_series, idxr, idxr_id): # GH 25567 - obj = obj(index) + obj = gen_obj(frame_or_series, index) idxr = idxr(obj) nd3 = np.random.randint(5, size=(2, 2, 2)) @@ -94,14 +88,6 @@ def test_getitem_ndarray_3d(self, index, obj, idxr, idxr_id): with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): idxr[nd3] - @pytest.mark.parametrize( - "obj", - [ - lambda i: Series(np.arange(len(i)), index=i), - lambda i: DataFrame(np.random.randn(len(i), len(i)), index=i, columns=i), - ], - ids=["Series", "DataFrame"], - ) @pytest.mark.parametrize( "idxr, idxr_id", [ @@ -110,9 +96,9 @@ def test_getitem_ndarray_3d(self, index, obj, idxr, idxr_id): (lambda x: x.iloc, "iloc"), ], ) - def test_setitem_ndarray_3d(self, index, obj, idxr, idxr_id): + def test_setitem_ndarray_3d(self, index, frame_or_series, idxr, idxr_id): # GH 25567 - obj = obj(index) + obj = gen_obj(frame_or_series, index) idxr = idxr(obj) nd3 = np.random.randint(5, size=(2, 2, 2)) @@ -135,15 +121,6 @@ def test_setitem_ndarray_3d(self, index, obj, idxr, idxr_id): with pytest.raises(err, match=msg): idxr[nd3] = 0 - def test_setitem_ndarray_3d_does_not_fail_for_iloc_empty_dataframe(self): - i = Index([]) - obj = DataFrame(np.random.randn(len(i), len(i)), index=i, columns=i) - nd3 = np.random.randint(5, size=(2, 2, 2)) - - msg = f"Cannot set values with ndim > {obj.ndim}" - with pytest.raises(ValueError, match=msg): - obj.iloc[nd3] = 0 - def test_inf_upcast(self): # GH 16957 # We should be able to use np.inf as a key @@ -617,7 +594,8 @@ def test_astype_assignment(self): expected = DataFrame({"A": [1, 2, 3, 4]}) tm.assert_frame_equal(df, expected) - def test_index_type_coercion(self): + @pytest.mark.parametrize("indexer", [lambda x: x.loc, lambda x: x]) + def test_index_type_coercion(self, indexer): # GH 11836 # if we have an index type and set it with something that looks @@ -630,41 +608,38 @@ def test_index_type_coercion(self): assert s.index.is_integer() - for indexer in [lambda x: x.loc, lambda x: x]: - s2 = s.copy() - indexer(s2)[0.1] = 0 - assert s2.index.is_floating() - assert indexer(s2)[0.1] == 0 + s2 = s.copy() + indexer(s2)[0.1] = 0 + assert s2.index.is_floating() + assert indexer(s2)[0.1] == 0 - s2 = s.copy() - indexer(s2)[0.0] = 0 - exp = s.index - if 0 not in s: - exp = Index(s.index.tolist() + [0]) - tm.assert_index_equal(s2.index, exp) + s2 = s.copy() + indexer(s2)[0.0] = 0 + exp = s.index + if 0 not in s: + exp = Index(s.index.tolist() + [0]) + tm.assert_index_equal(s2.index, exp) - s2 = s.copy() - indexer(s2)["0"] = 0 - assert s2.index.is_object() + s2 = s.copy() + indexer(s2)["0"] = 0 + assert s2.index.is_object() for s in [Series(range(5), index=np.arange(5.0))]: assert s.index.is_floating() - for idxr in [lambda x: x.loc, lambda x: x]: - - s2 = s.copy() - idxr(s2)[0.1] = 0 - assert s2.index.is_floating() - assert idxr(s2)[0.1] == 0 + s2 = s.copy() + indexer(s2)[0.1] = 0 + assert s2.index.is_floating() + assert indexer(s2)[0.1] == 0 - s2 = s.copy() - idxr(s2)[0.0] = 0 - tm.assert_index_equal(s2.index, s.index) + s2 = s.copy() + indexer(s2)[0.0] = 0 + tm.assert_index_equal(s2.index, s.index) - s2 = s.copy() - idxr(s2)["0"] = 0 - assert s2.index.is_object() + s2 = s.copy() + indexer(s2)["0"] = 0 + assert s2.index.is_object() class TestMisc: @@ -693,22 +668,6 @@ def test_float_index_at_iat(self): for i in range(len(s)): assert s.iat[i] == i + 1 - def test_mixed_index_assignment(self): - # GH 19860 - s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2]) - s.at["a"] = 11 - assert s.iat[0] == 11 - s.at[1] = 22 - assert s.iat[3] == 22 - - def test_mixed_index_no_fallback(self): - # GH 19860 - s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2]) - with pytest.raises(KeyError, match="^0$"): - s.at[0] - with pytest.raises(KeyError, match="^4$"): - s.at[4] - def test_rhs_alignment(self): # GH8258, tests that both rows & columns are aligned to what is # assigned to. covers both uniform data-type & multi-type cases From f30a030559d884ed7249d9ac28b1ac7b863fe8d8 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 8 Nov 2020 11:35:35 -0800 Subject: [PATCH 3/4] parametrize --- pandas/tests/indexing/test_floats.py | 26 +++++++++++--------------- pandas/tests/indexing/test_scalar.py | 2 +- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index c48e0a129e161..da6a411864e9e 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -69,15 +69,14 @@ def test_scalar_error(self, series_with_simple_index): tm.makePeriodIndex, ], ) - @pytest.mark.parametrize("klass", [Series, DataFrame]) - def test_scalar_non_numeric(self, index_func, klass): + def test_scalar_non_numeric(self, index_func, frame_or_series): # GH 4892 # float_indexers should raise exceptions # on appropriate Index types & accessors i = index_func(5) - s = gen_obj(klass, i) + s = gen_obj(frame_or_series, i) # getting with pytest.raises(KeyError, match="^3.0$"): @@ -103,6 +102,7 @@ def test_scalar_non_numeric(self, index_func, klass): pass elif s.index.inferred_type in ["datetime64", "timedelta64", "period"]: + # FIXME: dont leave commented-out # these should prob work # and are inconsistent between series/dataframe ATM # for idxr in [lambda x: x]: @@ -182,14 +182,13 @@ def test_scalar_with_mixed(self): assert result == expected @pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex]) - @pytest.mark.parametrize("klass", [Series, DataFrame]) - def test_scalar_integer(self, index_func, klass): + def test_scalar_integer(self, index_func, frame_or_series): # test how scalar float indexers work on int indexes # integer index i = index_func(5) - obj = gen_obj(klass, i) + obj = gen_obj(frame_or_series, i) # coerce to equal int for idxr, getitem in [(lambda x: x.loc, False), (lambda x: x, True)]: @@ -226,12 +225,11 @@ def compare(x, y): # coerce to equal int assert 3.0 in obj - @pytest.mark.parametrize("klass", [Series, DataFrame]) - def test_scalar_float(self, klass): + def test_scalar_float(self, frame_or_series): # scalar float indexers work on a float index index = Index(np.arange(5.0)) - s = gen_obj(klass, index) + s = gen_obj(frame_or_series, index) # assert all operations except for iloc are ok indexer = index[3] @@ -281,15 +279,14 @@ def test_scalar_float(self, klass): ], ) @pytest.mark.parametrize("l", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]) - @pytest.mark.parametrize("klass", [Series, DataFrame]) - def test_slice_non_numeric(self, index_func, l, klass): + def test_slice_non_numeric(self, index_func, l, frame_or_series): # GH 4892 # float_indexers should raise exceptions # on appropriate Index types & accessors index = index_func(5) - s = gen_obj(klass, index) + s = gen_obj(frame_or_series, index) # getitem msg = ( @@ -509,12 +506,11 @@ def test_float_slice_getitem_with_integer_index_raises(self, l, index_func): s[l] @pytest.mark.parametrize("l", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]) - @pytest.mark.parametrize("klass", [Series, DataFrame]) - def test_slice_float(self, l, klass): + def test_slice_float(self, l, frame_or_series): # same as above, but for floats index = Index(np.arange(5.0)) + 0.1 - s = gen_obj(klass, index) + s = gen_obj(frame_or_series, index) expected = s.iloc[3:4] for idxr in [lambda x: x.loc, lambda x: x]: diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index 127d00c217a15..230725d8ee11d 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -178,7 +178,7 @@ def test_at_with_tz(self): result = df.at[0, "date"] assert result == expected - def test_series_set_tz_timestamp(self, tz_naive_fixture): + def test_at_setitem_expansion_series_dt64tz_value(self, tz_naive_fixture): # GH 25506 ts = Timestamp("2017-08-05 00:00:00+0100", tz=tz_naive_fixture) result = Series(ts) From 21c63044ff0e034b19bfe21d4d7a0eeadb83c3f7 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 9 Nov 2020 16:11:19 -0800 Subject: [PATCH 4/4] TST/REF: collect indexing tests by method --- pandas/tests/frame/indexing/test_getitem.py | 17 +++++++ pandas/tests/frame/indexing/test_sparse.py | 19 -------- pandas/tests/indexing/test_floats.py | 49 --------------------- pandas/tests/indexing/test_iloc.py | 48 ++++++++++++++++---- 4 files changed, 56 insertions(+), 77 deletions(-) delete mode 100644 pandas/tests/frame/indexing/test_sparse.py diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py index 079cc12389835..2e65770d7afad 100644 --- a/pandas/tests/frame/indexing/test_getitem.py +++ b/pandas/tests/frame/indexing/test_getitem.py @@ -7,11 +7,13 @@ CategoricalIndex, DataFrame, MultiIndex, + Series, Timestamp, get_dummies, period_range, ) import pandas._testing as tm +from pandas.core.arrays import SparseArray class TestGetitem: @@ -50,6 +52,21 @@ def test_getitem_list_of_labels_categoricalindex_cols(self): result = dummies[list(dummies.columns)] tm.assert_frame_equal(result, expected) + def test_getitem_sparse_column_return_type_and_dtype(self): + # https://github.com/pandas-dev/pandas/issues/23559 + data = SparseArray([0, 1]) + df = DataFrame({"A": data}) + expected = Series(data, name="A") + result = df["A"] + tm.assert_series_equal(result, expected) + + # Also check iloc and loc while we're here + result = df.iloc[:, 0] + tm.assert_series_equal(result, expected) + + result = df.loc[:, "A"] + tm.assert_series_equal(result, expected) + class TestGetitemCallable: def test_getitem_callable(self, float_frame): diff --git a/pandas/tests/frame/indexing/test_sparse.py b/pandas/tests/frame/indexing/test_sparse.py deleted file mode 100644 index 47e4ae1f9f9e1..0000000000000 --- a/pandas/tests/frame/indexing/test_sparse.py +++ /dev/null @@ -1,19 +0,0 @@ -import pandas as pd -import pandas._testing as tm -from pandas.arrays import SparseArray - - -class TestSparseDataFrameIndexing: - def test_getitem_sparse_column(self): - # https://github.com/pandas-dev/pandas/issues/23559 - data = SparseArray([0, 1]) - df = pd.DataFrame({"A": data}) - expected = pd.Series(data, name="A") - result = df["A"] - tm.assert_series_equal(result, expected) - - result = df.iloc[:, 0] - tm.assert_series_equal(result, expected) - - result = df.loc[:, "A"] - tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index da6a411864e9e..1b78ba6defd69 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -1,17 +1,9 @@ -import re - import numpy as np import pytest from pandas import DataFrame, Float64Index, Index, Int64Index, RangeIndex, Series import pandas._testing as tm -# We pass through the error message from numpy -_slice_iloc_msg = re.escape( - "only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) " - "and integer or boolean arrays are valid indices" -) - def gen_obj(klass, index): if klass is Series: @@ -40,24 +32,6 @@ def check(self, result, original, indexer, getitem): tm.assert_almost_equal(result, expected) - def test_scalar_error(self, series_with_simple_index): - - # GH 4892 - # float_indexers should raise exceptions - # on appropriate Index types & accessors - # this duplicates the code below - # but is specifically testing for the error - # message - - s = series_with_simple_index - - msg = "Cannot index by location index with a non-integer key" - with pytest.raises(TypeError, match=msg): - s.iloc[3.0] - - with pytest.raises(IndexError, match=_slice_iloc_msg): - s.iloc[3.0] = 0 - @pytest.mark.parametrize( "index_func", [ @@ -82,20 +56,12 @@ def test_scalar_non_numeric(self, index_func, frame_or_series): with pytest.raises(KeyError, match="^3.0$"): s[3.0] - msg = "Cannot index by location index with a non-integer key" - with pytest.raises(TypeError, match=msg): - s.iloc[3.0] - with pytest.raises(KeyError, match="^3.0$"): s.loc[3.0] # contains assert 3.0 not in s - # setting with a float fails with iloc - with pytest.raises(IndexError, match=_slice_iloc_msg): - s.iloc[3.0] = 0 - # setting with an indexer if s.index.inferred_type in ["categorical"]: # Value or Type Error @@ -151,10 +117,6 @@ def test_scalar_with_mixed(self): with pytest.raises(KeyError, match="^1.0$"): s2[1.0] - msg = "Cannot index by location index with a non-integer key" - with pytest.raises(TypeError, match=msg): - s2.iloc[1.0] - with pytest.raises(KeyError, match=r"^1\.0$"): s2.loc[1.0] @@ -171,9 +133,6 @@ def test_scalar_with_mixed(self): expected = 2 assert result == expected - msg = "Cannot index by location index with a non-integer key" - with pytest.raises(TypeError, match=msg): - s3.iloc[1.0] with pytest.raises(KeyError, match=r"^1\.0$"): s3.loc[1.0] @@ -260,14 +219,6 @@ def test_scalar_float(self, frame_or_series): result = s2.iloc[3] self.check(result, s, 3, False) - # iloc raises with a float - msg = "Cannot index by location index with a non-integer key" - with pytest.raises(TypeError, match=msg): - s.iloc[3.0] - - with pytest.raises(IndexError, match=_slice_iloc_msg): - s2.iloc[3.0] = 0 - @pytest.mark.parametrize( "index_func", [ diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index fd1d5e4cedd65..0360d7e01e62d 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -1,13 +1,14 @@ """ test positional based indexing with iloc """ from datetime import datetime +import re from warnings import catch_warnings, simplefilter import numpy as np import pytest -import pandas as pd from pandas import ( + Categorical, CategoricalDtype, DataFrame, Index, @@ -22,6 +23,12 @@ from pandas.core.indexing import IndexingError from pandas.tests.indexing.common import Base +# We pass through the error message from numpy +_slice_iloc_msg = re.escape( + "only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) " + "and integer or boolean arrays are valid indices" +) + class TestiLoc(Base): def test_iloc_getitem_int(self): @@ -59,7 +66,7 @@ class TestiLoc2: def test_is_scalar_access(self): # GH#32085 index with duplicates doesnt matter for _is_scalar_access - index = pd.Index([1, 2, 1]) + index = Index([1, 2, 1]) ser = Series(range(3), index=index) assert ser.iloc._is_scalar_access((1,)) @@ -346,7 +353,7 @@ def test_iloc_setitem_pandas_object(self): tm.assert_series_equal(s, expected) s = s_orig.copy() - s.iloc[pd.Index([1, 2])] = [-1, -2] + s.iloc[Index([1, 2])] = [-1, -2] tm.assert_series_equal(s, expected) def test_iloc_setitem_dups(self): @@ -719,13 +726,13 @@ def test_series_indexing_zerodim_np_array(self): @pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/33457") def test_iloc_setitem_categorical_updates_inplace(self): # Mixed dtype ensures we go through take_split_path in setitem_with_indexer - cat = pd.Categorical(["A", "B", "C"]) + cat = Categorical(["A", "B", "C"]) df = DataFrame({1: cat, 2: [1, 2, 3]}) # This should modify our original values in-place df.iloc[:, 0] = cat[::-1] - expected = pd.Categorical(["C", "B", "A"]) + expected = Categorical(["C", "B", "A"]) tm.assert_categorical_equal(cat, expected) def test_iloc_with_boolean_operation(self): @@ -749,9 +756,9 @@ def test_iloc_with_boolean_operation(self): def test_iloc_getitem_singlerow_slice_categoricaldtype_gives_series(self): # GH#29521 - df = DataFrame({"x": pd.Categorical("a b c d e".split())}) + df = DataFrame({"x": Categorical("a b c d e".split())}) result = df.iloc[0] - raw_cat = pd.Categorical(["a"], categories=["a", "b", "c", "d", "e"]) + raw_cat = Categorical(["a"], categories=["a", "b", "c", "d", "e"]) expected = Series(raw_cat, index=["x"], name=0, dtype="category") tm.assert_series_equal(result, expected) @@ -782,7 +789,7 @@ def test_iloc_setitem_td64_values_cast_na(self, value): # GH#18586 series = Series([0, 1, 2], dtype="timedelta64[ns]") series.iloc[0] = value - expected = pd.Series([NaT, 1, 2], dtype="timedelta64[ns]") + expected = Series([NaT, 1, 2], dtype="timedelta64[ns]") tm.assert_series_equal(series, expected) def test_iloc_setitem_empty_frame_raises_with_3d_ndarray(self): @@ -795,12 +802,35 @@ def test_iloc_setitem_empty_frame_raises_with_3d_ndarray(self): obj.iloc[nd3] = 0 +class TestILocErrors: + # NB: this test should work for _any_ Series we can pass as + # series_with_simple_index + def test_iloc_float_raises(self, series_with_simple_index, frame_or_series): + # GH#4892 + # float_indexers should raise exceptions + # on appropriate Index types & accessors + # this duplicates the code below + # but is specifically testing for the error + # message + + obj = series_with_simple_index + if frame_or_series is DataFrame: + obj = obj.to_frame() + + msg = "Cannot index by location index with a non-integer key" + with pytest.raises(TypeError, match=msg): + obj.iloc[3.0] + + with pytest.raises(IndexError, match=_slice_iloc_msg): + obj.iloc[3.0] = 0 + + class TestILocSetItemDuplicateColumns: def test_iloc_setitem_scalar_duplicate_columns(self): # GH#15686, duplicate columns and mixed dtype df1 = DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}]) df2 = DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}]) - df = pd.concat([df1, df2], axis=1) + df = concat([df1, df2], axis=1) df.iloc[0, 0] = -1 assert df.iloc[0, 0] == -1