From ea198b857eed7f9346fcee85e7a88e39325607d6 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 6 Feb 2021 11:54:20 +0100 Subject: [PATCH 1/7] BUG: Series.loc raising KeyError for Iterator indexer in case of setitem --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/core/indexing.py | 3 +++ pandas/tests/indexing/test_loc.py | 12 ++++++++++++ 3 files changed, 16 insertions(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 17d8c79994dbe..9bddaa6a0084f 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -334,6 +334,7 @@ Indexing - Bug in :meth:`DataFrame.__setitem__` raising ``ValueError`` with empty :class:`DataFrame` and specified columns for string indexer and non empty :class:`DataFrame` to set (:issue:`38831`) - Bug in :meth:`DataFrame.loc.__setitem__` raising ValueError when expanding unique column for :class:`DataFrame` with duplicate columns (:issue:`38521`) - Bug in :meth:`DataFrame.iloc.__setitem__` and :meth:`DataFrame.loc.__setitem__` with mixed dtypes when setting with a dictionary value (:issue:`38335`) +- Bug in :meth:`Series.loc.__setitem__` and :meth:`DataFrame.loc.__setitem__` raising ``KeyError`` for boolean Iterator indexer (:issue:`39614`) - Bug in :meth:`DataFrame.__setitem__` not raising ``ValueError`` when right hand side is a :class:`DataFrame` with wrong number of columns (:issue:`38604`) - Bug in :meth:`Series.__setitem__` raising ``ValueError`` when setting a :class:`Series` with a scalar indexer (:issue:`38303`) - Bug in :meth:`DataFrame.loc` dropping levels of :class:`MultiIndex` when :class:`DataFrame` used as input has only one row (:issue:`10521`) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index cc7c5f666feda..697c30a92001e 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1217,6 +1217,9 @@ def _convert_to_indexer(self, key, axis: int, is_setter: bool = False): elif is_list_like_indexer(key): + if is_iterator(key): + key = list(key) + if com.is_bool_indexer(key): key = check_bool_indexer(labels, key) (inds,) = key.nonzero() diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 1cd352e4e0899..9bc8905ddb5b8 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2148,3 +2148,15 @@ def test_loc_series_getitem_too_many_dimensions(self, indexer): with pytest.raises(ValueError, match=msg): ser.loc[indexer, :] = 1 + + def test_loc_generator(self, frame_or_series): + # GH#39614 + obj = frame_or_series([1, 2, 3]) + indexer = [True, False, False] + result = obj.loc[reversed(indexer)] + expected = frame_or_series([3], index=[2]) + tm.assert_equal(result, expected) + + obj.loc[reversed(indexer)] = 5 + expected = frame_or_series([1, 2, 5]) + tm.assert_equal(obj, expected) From 00a9b958ebeb46e6b4447c02c79b7522d325804a Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 13 Feb 2021 21:46:31 +0100 Subject: [PATCH 2/7] Move test --- pandas/tests/indexing/test_loc.py | 12 ------------ pandas/tests/series/indexing/test_setitem.py | 8 ++++++++ 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 9bc8905ddb5b8..1cd352e4e0899 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2148,15 +2148,3 @@ def test_loc_series_getitem_too_many_dimensions(self, indexer): with pytest.raises(ValueError, match=msg): ser.loc[indexer, :] = 1 - - def test_loc_generator(self, frame_or_series): - # GH#39614 - obj = frame_or_series([1, 2, 3]) - indexer = [True, False, False] - result = obj.loc[reversed(indexer)] - expected = frame_or_series([3], index=[2]) - tm.assert_equal(result, expected) - - obj.loc[reversed(indexer)] = 5 - expected = frame_or_series([1, 2, 5]) - tm.assert_equal(obj, expected) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 767b61e31698b..5f09f64a61bd1 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -360,6 +360,10 @@ def test_int_key(self, obj, key, expected, val, indexer_sli, is_inplace): indkey = np.array(ilkey) self.check_indexer(obj, indkey, expected, val, indexer_sli, is_inplace) + if indexer_sli is not tm.iloc: + genkey = (x for x in [key]) + self.check_indexer(obj, genkey, expected, val, indexer_sli, is_inplace) + def test_slice_key(self, obj, key, expected, val, indexer_sli, is_inplace): if not isinstance(key, slice): return @@ -374,6 +378,10 @@ def test_slice_key(self, obj, key, expected, val, indexer_sli, is_inplace): indkey = np.array(ilkey) self.check_indexer(obj, indkey, expected, val, indexer_sli, is_inplace) + if indexer_sli is not tm.iloc: + genkey = (x for x in indkey) + self.check_indexer(obj, genkey, expected, val, indexer_sli, is_inplace) + def test_mask_key(self, obj, key, expected, val, indexer_sli): # setitem with boolean mask mask = np.zeros(obj.shape, dtype=bool) From df248cffd71600bf6afe54094a411875fe5437b0 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 13 Feb 2021 22:05:48 +0100 Subject: [PATCH 3/7] Regression in to_excel when setting duplicate column names --- doc/source/whatsnew/v1.2.2.rst | 2 +- pandas/io/formats/excel.py | 2 +- pandas/tests/io/excel/test_writers.py | 9 +++++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.2.2.rst b/doc/source/whatsnew/v1.2.2.rst index 974f84d3b244a..31f63416428ea 100644 --- a/doc/source/whatsnew/v1.2.2.rst +++ b/doc/source/whatsnew/v1.2.2.rst @@ -22,7 +22,7 @@ Fixed regressions - Fixed regression in :func:`pandas.testing.assert_series_equal` and :func:`pandas.testing.assert_frame_equal` always raising ``AssertionError`` when comparing extension dtypes (:issue:`39410`) - Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamWriter`` in binary mode instead of in text mode and ignoring user-provided ``mode`` (:issue:`39247`) - Fixed regression in :meth:`core.window.rolling.Rolling.count` where the ``min_periods`` argument would be set to ``0`` after the operation (:issue:`39554`) -- +- Fixed regression in :func:`pandas.to_excel` raising ``KeyError`` when giving duplicate columns with ``columns`` attribute (:issue:`39695`). .. --------------------------------------------------------------------------- diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index b027d8139f24b..684ea9f01ff35 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -475,7 +475,7 @@ def __init__( if not len(Index(cols).intersection(df.columns)): raise KeyError("passes columns are not ALL present dataframe") - if len(Index(cols).intersection(df.columns)) != len(cols): + if len(Index(cols).intersection(df.columns)) != len(set(cols)): # Deprecated in GH#17295, enforced in 1.0.0 raise KeyError("Not all names specified in 'columns' are found") diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 14ad97c058a55..ea234e2ca831e 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1295,6 +1295,15 @@ def test_raise_when_saving_timezones(self, dtype, tz_aware_fixture, path): with pytest.raises(ValueError, match="Excel does not support"): df.to_excel(path) + def test_excel_duplicate_columns_with_names(self, path): + # GH#39695 + df = DataFrame({"A": [0, 1], "B": [10, 11]}) + df.to_excel(path, columns=["A", "B", "A"], index=False) + + result = pd.read_excel(path) + expected = DataFrame([[0, 10, 0], [1, 11, 1]], columns=["A", "B", "A.1"]) + tm.assert_frame_equal(result, expected) + class TestExcelWriterEngineTests: @pytest.mark.parametrize( From 44d2041556bccaf9acc27e227c8be67b040bea8d Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 19 Feb 2021 21:17:17 +0100 Subject: [PATCH 4/7] Add support for iloc iterator indexer --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/core/indexing.py | 5 +++++ pandas/tests/series/indexing/test_setitem.py | 10 ++++------ 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 9bddaa6a0084f..7c0667da9a87f 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -335,6 +335,7 @@ Indexing - Bug in :meth:`DataFrame.loc.__setitem__` raising ValueError when expanding unique column for :class:`DataFrame` with duplicate columns (:issue:`38521`) - Bug in :meth:`DataFrame.iloc.__setitem__` and :meth:`DataFrame.loc.__setitem__` with mixed dtypes when setting with a dictionary value (:issue:`38335`) - Bug in :meth:`Series.loc.__setitem__` and :meth:`DataFrame.loc.__setitem__` raising ``KeyError`` for boolean Iterator indexer (:issue:`39614`) +- Bug in :meth:`Series.iloc` and :meth:`DataFrame.iloc` raising ``KeyError`` for Iterator indexer (:issue:`39614`) - Bug in :meth:`DataFrame.__setitem__` not raising ``ValueError`` when right hand side is a :class:`DataFrame` with wrong number of columns (:issue:`38604`) - Bug in :meth:`Series.__setitem__` raising ``ValueError`` when setting a :class:`Series` with a scalar indexer (:issue:`38303`) - Bug in :meth:`DataFrame.loc` dropping levels of :class:`MultiIndex` when :class:`DataFrame` used as input has only one row (:issue:`10521`) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 697c30a92001e..b30019dc54353 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1493,6 +1493,9 @@ def _getitem_axis(self, key, axis: int): if isinstance(key, slice): return self._get_slice_axis(key, axis=axis) + if is_iterator(key): + key = list(key) + if isinstance(key, list): key = np.asarray(key) @@ -1534,6 +1537,8 @@ def _convert_to_indexer(self, key, axis: int, is_setter: bool = False): def _get_setitem_indexer(self, key): # GH#32257 Fall through to let numpy do validation + if is_iterator(key): + return list(key) return key # ------------------------------------------------------------------- diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 5f09f64a61bd1..99d4aa661709c 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -360,9 +360,8 @@ def test_int_key(self, obj, key, expected, val, indexer_sli, is_inplace): indkey = np.array(ilkey) self.check_indexer(obj, indkey, expected, val, indexer_sli, is_inplace) - if indexer_sli is not tm.iloc: - genkey = (x for x in [key]) - self.check_indexer(obj, genkey, expected, val, indexer_sli, is_inplace) + genkey = (x for x in [key]) + self.check_indexer(obj, genkey, expected, val, indexer_sli, is_inplace) def test_slice_key(self, obj, key, expected, val, indexer_sli, is_inplace): if not isinstance(key, slice): @@ -378,9 +377,8 @@ def test_slice_key(self, obj, key, expected, val, indexer_sli, is_inplace): indkey = np.array(ilkey) self.check_indexer(obj, indkey, expected, val, indexer_sli, is_inplace) - if indexer_sli is not tm.iloc: - genkey = (x for x in indkey) - self.check_indexer(obj, genkey, expected, val, indexer_sli, is_inplace) + genkey = (x for x in indkey) + self.check_indexer(obj, genkey, expected, val, indexer_sli, is_inplace) def test_mask_key(self, obj, key, expected, val, indexer_sli): # setitem with boolean mask From 989d181ec7e881a14a46bcd635736d2d997c0a37 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 19 Feb 2021 21:18:00 +0100 Subject: [PATCH 5/7] Revert "Regression in to_excel when setting duplicate column names" This reverts commit df248cff --- doc/source/whatsnew/v1.2.2.rst | 2 +- pandas/io/formats/excel.py | 2 +- pandas/tests/io/excel/test_writers.py | 9 --------- 3 files changed, 2 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v1.2.2.rst b/doc/source/whatsnew/v1.2.2.rst index 31f63416428ea..974f84d3b244a 100644 --- a/doc/source/whatsnew/v1.2.2.rst +++ b/doc/source/whatsnew/v1.2.2.rst @@ -22,7 +22,7 @@ Fixed regressions - Fixed regression in :func:`pandas.testing.assert_series_equal` and :func:`pandas.testing.assert_frame_equal` always raising ``AssertionError`` when comparing extension dtypes (:issue:`39410`) - Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamWriter`` in binary mode instead of in text mode and ignoring user-provided ``mode`` (:issue:`39247`) - Fixed regression in :meth:`core.window.rolling.Rolling.count` where the ``min_periods`` argument would be set to ``0`` after the operation (:issue:`39554`) -- Fixed regression in :func:`pandas.to_excel` raising ``KeyError`` when giving duplicate columns with ``columns`` attribute (:issue:`39695`). +- .. --------------------------------------------------------------------------- diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 684ea9f01ff35..b027d8139f24b 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -475,7 +475,7 @@ def __init__( if not len(Index(cols).intersection(df.columns)): raise KeyError("passes columns are not ALL present dataframe") - if len(Index(cols).intersection(df.columns)) != len(set(cols)): + if len(Index(cols).intersection(df.columns)) != len(cols): # Deprecated in GH#17295, enforced in 1.0.0 raise KeyError("Not all names specified in 'columns' are found") diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index ea234e2ca831e..14ad97c058a55 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1295,15 +1295,6 @@ def test_raise_when_saving_timezones(self, dtype, tz_aware_fixture, path): with pytest.raises(ValueError, match="Excel does not support"): df.to_excel(path) - def test_excel_duplicate_columns_with_names(self, path): - # GH#39695 - df = DataFrame({"A": [0, 1], "B": [10, 11]}) - df.to_excel(path, columns=["A", "B", "A"], index=False) - - result = pd.read_excel(path) - expected = DataFrame([[0, 10, 0], [1, 11, 1]], columns=["A", "B", "A.1"]) - tm.assert_frame_equal(result, expected) - class TestExcelWriterEngineTests: @pytest.mark.parametrize( From 9924314a446a4fe9e60674f525df702d3b568369 Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 29 Mar 2021 23:11:06 +0200 Subject: [PATCH 6/7] Add tests for one dimensional indexer --- pandas/tests/frame/indexing/test_getitem.py | 8 ++++++++ pandas/tests/frame/indexing/test_setitem.py | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py index 290ba67c7d05b..9ad27b31581be 100644 --- a/pandas/tests/frame/indexing/test_getitem.py +++ b/pandas/tests/frame/indexing/test_getitem.py @@ -144,6 +144,14 @@ def test_getitem_listlike(self, idx_type, levels, float_frame): with pytest.raises(KeyError, match="not in index"): frame[idx] + def test_getitem_iloc_generator(self): + # GH#39614 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + indexer = (x for x in [1, 2]) + result = df.iloc[indexer] + expected = DataFrame({"a": [2, 3], "b": [5, 6]}, index=[1, 2]) + tm.assert_frame_equal(result, expected) + class TestGetitemCallable: def test_getitem_callable(self, float_frame): diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 1d41426b93db6..450c838a88a80 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -610,6 +610,14 @@ def test_setitem_list_of_tuples(self, float_frame): expected = Series(tuples, index=float_frame.index, name="tuples") tm.assert_series_equal(result, expected) + def test_setitem_iloc_generator(self): + # GH#39614 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + indexer = (x for x in [1, 2]) + df.iloc[indexer] = 1 + expected = DataFrame({"a": [1, 1, 1], "b": [4, 1, 1]}) + tm.assert_frame_equal(df, expected) + class TestSetitemTZAwareValues: @pytest.fixture From 6ab583907d497502a38933d7c44533127666c0a0 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 2 Apr 2021 23:05:13 +0200 Subject: [PATCH 7/7] Handle iterator for all cases --- pandas/core/indexing.py | 2 ++ pandas/tests/frame/indexing/test_getitem.py | 7 +++++++ pandas/tests/frame/indexing/test_setitem.py | 7 +++++++ 3 files changed, 16 insertions(+) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a1c0295225fae..e4e79e21ca3c3 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -706,6 +706,7 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None): def __setitem__(self, key, value): if isinstance(key, tuple): + key = tuple(list(x) if is_iterator(x) else x for x in key) key = tuple(com.apply_if_callable(x, self.obj) for x in key) else: key = com.apply_if_callable(key, self.obj) @@ -914,6 +915,7 @@ def _convert_to_indexer(self, key, axis: int, is_setter: bool = False): def __getitem__(self, key): if type(key) is tuple: + key = tuple(list(x) if is_iterator(x) else x for x in key) key = tuple(com.apply_if_callable(x, self.obj) for x in key) if self._is_scalar_access(key): with suppress(KeyError, IndexError, AttributeError): diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py index 9ad27b31581be..073e7b0357124 100644 --- a/pandas/tests/frame/indexing/test_getitem.py +++ b/pandas/tests/frame/indexing/test_getitem.py @@ -152,6 +152,13 @@ def test_getitem_iloc_generator(self): expected = DataFrame({"a": [2, 3], "b": [5, 6]}, index=[1, 2]) tm.assert_frame_equal(result, expected) + def test_getitem_iloc_two_dimensional_generator(self): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + indexer = (x for x in [1, 2]) + result = df.iloc[indexer, 1] + expected = Series([5, 6], name="b", index=[1, 2]) + tm.assert_series_equal(result, expected) + class TestGetitemCallable: def test_getitem_callable(self, float_frame): diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 450c838a88a80..3a1b2345ee7f0 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -618,6 +618,13 @@ def test_setitem_iloc_generator(self): expected = DataFrame({"a": [1, 1, 1], "b": [4, 1, 1]}) tm.assert_frame_equal(df, expected) + def test_setitem_iloc_two_dimensional_generator(self): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + indexer = (x for x in [1, 2]) + df.iloc[indexer, 1] = 1 + expected = DataFrame({"a": [1, 2, 3], "b": [4, 1, 1]}) + tm.assert_frame_equal(df, expected) + class TestSetitemTZAwareValues: @pytest.fixture