diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index febf08f2c47aa..1fde030d4b7a8 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -545,6 +545,7 @@ Other Deprecations - Deprecated parameter ``names`` in :meth:`Index.copy` (:issue:`44916`) - A deprecation warning is now shown for :meth:`DataFrame.to_latex` indicating the arguments signature may change and emulate more the arguments to :meth:`.Styler.to_latex` in future versions (:issue:`44411`) - Deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`) +- Deprecated passing ``set`` or ``dict`` as indexer for :meth:`DataFrame.loc.__setitem__`, :meth:`DataFrame.loc.__getitem__`, :meth:`Series.loc.__setitem__`, :meth:`Series.loc.__getitem__`, :meth:`DataFrame.__getitem__`, :meth:`Series.__getitem__` and :meth:`Series.__setitem__` (:issue:`42825`) - Deprecated :meth:`Index.__getitem__` with a bool key; use ``index.values[key]`` to get the old behavior (:issue:`44051`) - Deprecated downcasting column-by-column in :meth:`DataFrame.where` with integer-dtypes (:issue:`44597`) - diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 03c9addefecc0..fd5163bcb14ed 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -170,6 +170,7 @@ ) from pandas.core.indexing import ( check_bool_indexer, + check_deprecated_indexers, convert_to_index_sliceable, ) from pandas.core.internals import ( @@ -3457,6 +3458,7 @@ def _iter_column_arrays(self) -> Iterator[ArrayLike]: yield self._get_column_array(i) def __getitem__(self, key): + check_deprecated_indexers(key) key = lib.item_from_zerodim(key) key = com.apply_if_callable(key, self) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index f043a8cee308c..19fbc43fea3b3 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -641,6 +641,10 @@ def _get_setitem_indexer(self, key): if self.name == "loc": self._ensure_listlike_indexer(key) + if isinstance(key, tuple): + for x in key: + check_deprecated_indexers(x) + if self.axis is not None: return self._convert_tuple(key) @@ -698,6 +702,7 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None): ) def __setitem__(self, key, value): + check_deprecated_indexers(key) if isinstance(key, tuple): key = tuple(list(x) if is_iterator(x) else x for x in key) key = tuple(com.apply_if_callable(x, self.obj) for x in key) @@ -890,6 +895,9 @@ def _getitem_nested_tuple(self, tup: tuple): # we have a nested tuple so have at least 1 multi-index level # we should be able to match up the dimensionality here + for key in tup: + check_deprecated_indexers(key) + # we have too many indexers for our dim, but have at least 1 # multi-index dimension, try to see if we have something like # a tuple passed to a series with a multi-index @@ -943,6 +951,7 @@ def _convert_to_indexer(self, key, axis: int): raise AbstractMethodError(self) def __getitem__(self, key): + check_deprecated_indexers(key) if type(key) is tuple: key = tuple(list(x) if is_iterator(x) else x for x in key) key = tuple(com.apply_if_callable(x, self.obj) for x in key) @@ -2444,3 +2453,29 @@ def need_slice(obj: slice) -> bool: or obj.stop is not None or (obj.step is not None and obj.step != 1) ) + + +def check_deprecated_indexers(key) -> None: + """Checks if the key is a deprecated indexer.""" + if ( + isinstance(key, set) + or isinstance(key, tuple) + and any(isinstance(x, set) for x in key) + ): + warnings.warn( + "Passing a set as an indexer is deprecated and will raise in " + "a future version. Use a list instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if ( + isinstance(key, dict) + or isinstance(key, tuple) + and any(isinstance(x, dict) for x in key) + ): + warnings.warn( + "Passing a dict as an indexer is deprecated and will raise in " + "a future version. Use a list instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) diff --git a/pandas/core/series.py b/pandas/core/series.py index 15805c0aa94ed..2c747b20e0a0a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -124,7 +124,10 @@ ensure_index, ) import pandas.core.indexes.base as ibase -from pandas.core.indexing import check_bool_indexer +from pandas.core.indexing import ( + check_bool_indexer, + check_deprecated_indexers, +) from pandas.core.internals import ( SingleArrayManager, SingleBlockManager, @@ -939,6 +942,7 @@ def _slice(self, slobj: slice, axis: int = 0) -> Series: return self._get_values(slobj) def __getitem__(self, key): + check_deprecated_indexers(key) key = com.apply_if_callable(key, self) if key is Ellipsis: @@ -1065,6 +1069,7 @@ def _get_value(self, label, takeable: bool = False): return self.index._get_values_for_loc(self, loc, label) def __setitem__(self, key, value) -> None: + check_deprecated_indexers(key) key = com.apply_if_callable(key, self) cacher_needs_updating = self._check_is_chained_assignment_possible() diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py index 3028a433f2dae..0d4ab84175aab 100644 --- a/pandas/tests/frame/indexing/test_getitem.py +++ b/pandas/tests/frame/indexing/test_getitem.py @@ -134,7 +134,11 @@ def test_getitem_listlike(self, idx_type, levels, float_frame): idx = idx_type(keys) idx_check = list(idx_type(keys)) - result = frame[idx] + if isinstance(idx, (set, dict)): + with tm.assert_produces_warning(FutureWarning): + result = frame[idx] + else: + result = frame[idx] expected = frame.loc[:, idx_check] expected.columns.names = frame.columns.names @@ -143,7 +147,8 @@ def test_getitem_listlike(self, idx_type, levels, float_frame): idx = idx_type(keys + [missing]) with pytest.raises(KeyError, match="not in index"): - frame[idx] + with tm.assert_produces_warning(FutureWarning): + frame[idx] def test_getitem_iloc_generator(self): # GH#39614 @@ -388,3 +393,14 @@ def test_getitem_datetime_slice(self): ), ) tm.assert_frame_equal(result, expected) + + +class TestGetitemDeprecatedIndexers: + @pytest.mark.parametrize("key", [{"a", "b"}, {"a": "a"}]) + def test_getitem_dict_and_set_deprecated(self, key): + # GH#42825 + df = DataFrame( + [[1, 2], [3, 4]], columns=MultiIndex.from_tuples([("a", 1), ("b", 2)]) + ) + with tm.assert_produces_warning(FutureWarning): + df[key] diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 2194fb4d5b1bd..bee8ccb125315 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1526,3 +1526,65 @@ def test_loc_iloc_setitem_non_categorical_rhs( # "c" not part of the categories with pytest.raises(TypeError, match=msg1): indexer(df)[key] = ["c", "c"] + + +class TestDepreactedIndexers: + @pytest.mark.parametrize( + "key", [{1}, {1: 1}, ({1}, "a"), ({1: 1}, "a"), (1, {"a"}), (1, {"a": "a"})] + ) + def test_getitem_dict_and_set_deprecated(self, key): + # GH#42825 + df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) + with tm.assert_produces_warning(FutureWarning): + df.loc[key] + + @pytest.mark.parametrize( + "key", + [ + {1}, + {1: 1}, + (({1}, 2), "a"), + (({1: 1}, 2), "a"), + ((1, 2), {"a"}), + ((1, 2), {"a": "a"}), + ], + ) + def test_getitem_dict_and_set_deprecated_multiindex(self, key): + # GH#42825 + df = DataFrame( + [[1, 2], [3, 4]], + columns=["a", "b"], + index=MultiIndex.from_tuples([(1, 2), (3, 4)]), + ) + with tm.assert_produces_warning(FutureWarning): + df.loc[key] + + @pytest.mark.parametrize( + "key", [{1}, {1: 1}, ({1}, "a"), ({1: 1}, "a"), (1, {"a"}), (1, {"a": "a"})] + ) + def test_setitem_dict_and_set_deprecated(self, key): + # GH#42825 + df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) + with tm.assert_produces_warning(FutureWarning): + df.loc[key] = 1 + + @pytest.mark.parametrize( + "key", + [ + {1}, + {1: 1}, + (({1}, 2), "a"), + (({1: 1}, 2), "a"), + ((1, 2), {"a"}), + ((1, 2), {"a": "a"}), + ], + ) + def test_setitem_dict_and_set_deprecated_multiindex(self, key): + # GH#42825 + df = DataFrame( + [[1, 2], [3, 4]], + columns=["a", "b"], + index=MultiIndex.from_tuples([(1, 2), (3, 4)]), + ) + with tm.assert_produces_warning(FutureWarning): + df.loc[key] = 1 diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 1756cc3ae707c..6e59311634c76 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -339,8 +339,11 @@ def convert_nested_indexer(indexer_type, keys): convert_nested_indexer(indexer_type, k) for indexer_type, k in zip(types, keys) ) - - result = df.loc[indexer, "Data"] + if indexer_type_1 is set or indexer_type_2 is set: + with tm.assert_produces_warning(FutureWarning): + result = df.loc[indexer, "Data"] + else: + result = df.loc[indexer, "Data"] expected = Series( [1, 2, 4, 5], name="Data", index=MultiIndex.from_product(keys) ) diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index dabe3d480ff19..0da376ccac450 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -696,3 +696,19 @@ def test_duplicated_index_getitem_positional_indexer(index_vals): s = Series(range(5), index=list(index_vals)) result = s[3] assert result == 3 + + +class TestGetitemDeprecatedIndexers: + @pytest.mark.parametrize("key", [{1}, {1: 1}]) + def test_getitem_dict_and_set_deprecated(self, key): + # GH#42825 + ser = Series([1, 2, 3]) + with tm.assert_produces_warning(FutureWarning): + ser[key] + + @pytest.mark.parametrize("key", [{1}, {1: 1}]) + def test_setitem_dict_and_set_deprecated(self, key): + # GH#42825 + ser = Series([1, 2, 3]) + with tm.assert_produces_warning(FutureWarning): + ser[key] = 1 diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 31c21e123a0de..ff0a4ae1b5564 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -8,6 +8,7 @@ from pandas import ( DataFrame, IndexSlice, + MultiIndex, Series, Timedelta, Timestamp, @@ -316,3 +317,33 @@ def test_frozenset_index(): assert s[idx1] == 2 s[idx1] = 3 assert s[idx1] == 3 + + +class TestDepreactedIndexers: + @pytest.mark.parametrize("key", [{1}, {1: 1}]) + def test_getitem_dict_and_set_deprecated(self, key): + # GH#42825 + ser = Series([1, 2]) + with tm.assert_produces_warning(FutureWarning): + ser.loc[key] + + @pytest.mark.parametrize("key", [{1}, {1: 1}, ({1}, 2), ({1: 1}, 2)]) + def test_getitem_dict_and_set_deprecated_multiindex(self, key): + # GH#42825 + ser = Series([1, 2], index=MultiIndex.from_tuples([(1, 2), (3, 4)])) + with tm.assert_produces_warning(FutureWarning): + ser.loc[key] + + @pytest.mark.parametrize("key", [{1}, {1: 1}]) + def test_setitem_dict_and_set_deprecated(self, key): + # GH#42825 + ser = Series([1, 2]) + with tm.assert_produces_warning(FutureWarning): + ser.loc[key] = 1 + + @pytest.mark.parametrize("key", [{1}, {1: 1}, ({1}, 2), ({1: 1}, 2)]) + def test_setitem_dict_and_set_deprecated_multiindex(self, key): + # GH#42825 + ser = Series([1, 2], index=MultiIndex.from_tuples([(1, 2), (3, 4)])) + with tm.assert_produces_warning(FutureWarning): + ser.loc[key] = 1