From 63fbce42ab39d4e6ca4c8d25020e7218557ee3ab Mon Sep 17 00:00:00 2001 From: tim <43515959+timhunderwood@users.noreply.github.com> Date: Sat, 20 Jun 2020 19:21:58 +0100 Subject: [PATCH 01/10] ENH: Include missing labels in error message for loc GH34272 --- pandas/core/indexing.py | 6 ++++-- pandas/tests/indexing/test_indexing.py | 9 +++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 9c8b01003bece..184a507914808 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1283,7 +1283,8 @@ def _validate_read_indexer( return # Count missing values: - missing = (indexer < 0).sum() + missing_mask = indexer < 0 + missing = (missing_mask).sum() if missing: if missing == len(indexer): @@ -1302,9 +1303,10 @@ def _validate_read_indexer( # code, so we want to avoid warning & then # just raising if not ax.is_categorical(): + not_found = list(key[missing_mask]) raise KeyError( "Passing list-likes to .loc or [] with any missing labels " - "is no longer supported, see " + f"is no longer supported. The following labels were missing: {not_found}. See " "https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike" # noqa:E501 ) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 5c0230e75021c..750eacd385df6 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -1075,3 +1075,12 @@ def test_setitem_with_bool_mask_and_values_matching_n_trues_in_length(): result = ser expected = pd.Series([None] * 3 + list(range(5)) + [None] * 2).astype("object") tm.assert_series_equal(result, expected) + + +def test_missing_labels_inside_loc(): + # GH34272 + s = pd.Series({"a": 1, "b": 2, "c": 3}) + with pytest.raises(KeyError) as e: + s.loc[["a", "b", "missing_0", "c", "missing_1", "missing_2"]] + missing_labels = ["missing_0", "missing_1", "missing_2"] + assert all(missing_label in str(e.value) for missing_label in missing_labels) From 140794bf7f8ea4a3febef5c8b3a5b9b366a6b5a0 Mon Sep 17 00:00:00 2001 From: tim <43515959+timhunderwood@users.noreply.github.com> Date: Sat, 20 Jun 2020 19:38:22 +0100 Subject: [PATCH 02/10] ENH: Include missing labels in error message for loc GH34272 Added what's new documentation. --- doc/source/whatsnew/v1.1.0.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index f6ad3a800283d..35a979220dcc6 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -13,6 +13,19 @@ including other versions of pandas. Enhancements ~~~~~~~~~~~~ +.. _whatsnew_110.specify_missing_labels: + +KeyErrors raised by loc specify missing labels +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Previously, if labels were missing for a loc call, a Key Error was raised stating that this was no longer supported. + +Now the error message also includes a list of the missing labels. For example, + +.. ipython:: python + + s = pd.Series({"a": 1, "b": 2, "c": 3}) + s.loc[["a", "b", "missing_0", "c", "missing_1", "missing_2"]] + .. _whatsnew_110.astype_string: All dtypes can now be converted to ``StringDtype`` From ac23b0230158c3a30970177082f05c52d33a7c6c Mon Sep 17 00:00:00 2001 From: tim <43515959+timhunderwood@users.noreply.github.com> Date: Sat, 20 Jun 2020 19:48:21 +0100 Subject: [PATCH 03/10] ENH: Include missing labels in error message for loc GH34272 Flake 8 line length linting. --- pandas/core/indexing.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 184a507914808..b79b92b52921f 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1306,8 +1306,9 @@ def _validate_read_indexer( not_found = list(key[missing_mask]) raise KeyError( "Passing list-likes to .loc or [] with any missing labels " - f"is no longer supported. The following labels were missing: {not_found}. See " - "https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike" # noqa:E501 + "is no longer supported. " + f"The following labels were missing: {not_found}. " + "See https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike" # noqa:E501 ) From 6b1ad07adf9261d227a231a426d58d4cf3fb6bda Mon Sep 17 00:00:00 2001 From: tim <43515959+timhunderwood@users.noreply.github.com> Date: Sat, 20 Jun 2020 20:42:09 +0100 Subject: [PATCH 04/10] ENH: Include missing labels in error message for loc GH34272 Fix ipython directive to code block so that error message can be included. --- doc/source/whatsnew/v1.1.0.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 35a979220dcc6..91a87bcb7f504 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -21,11 +21,12 @@ Previously, if labels were missing for a loc call, a Key Error was raised statin Now the error message also includes a list of the missing labels. For example, -.. ipython:: python +.. code-block:: ipython s = pd.Series({"a": 1, "b": 2, "c": 3}) s.loc[["a", "b", "missing_0", "c", "missing_1", "missing_2"]] - + ... + KeyError: "Passing list-likes to .loc or [] with any missing labels is no longer supported. The following labels were missing: ['missing_0', 'missing_1', 'missing_2']. See https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike" .. _whatsnew_110.astype_string: All dtypes can now be converted to ``StringDtype`` From 871059664c73550e85b01d566b4aef2808de43f7 Mon Sep 17 00:00:00 2001 From: tim <43515959+timhunderwood@users.noreply.github.com> Date: Sat, 20 Jun 2020 21:28:23 +0100 Subject: [PATCH 05/10] ENH: Include missing labels in error message for loc GH34272 Fixed rst flake8 issues. --- doc/source/whatsnew/v1.1.0.rst | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 91a87bcb7f504..c143fc5f4c0fa 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -20,13 +20,15 @@ KeyErrors raised by loc specify missing labels Previously, if labels were missing for a loc call, a Key Error was raised stating that this was no longer supported. Now the error message also includes a list of the missing labels. For example, - .. code-block:: ipython - s = pd.Series({"a": 1, "b": 2, "c": 3}) - s.loc[["a", "b", "missing_0", "c", "missing_1", "missing_2"]] - ... - KeyError: "Passing list-likes to .loc or [] with any missing labels is no longer supported. The following labels were missing: ['missing_0', 'missing_1', 'missing_2']. See https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike" + In [1]: s = pd.Series({"a": 1, "b": 2, "c": 3}) + In [2]: s.loc[["a", "b", "missing_0", "c", "missing_1", "missing_2"]] + Passing list-likes to .loc or [] with any missing labels is no longer supported. + The following labels were missing: ['missing_0', 'missing_1', 'missing_2']. See + https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike + + .. _whatsnew_110.astype_string: All dtypes can now be converted to ``StringDtype`` From b77dcfcb176c258168dfc9f49a37b7b5cccbaa32 Mon Sep 17 00:00:00 2001 From: tim <43515959+timhunderwood@users.noreply.github.com> Date: Sun, 21 Jun 2020 12:14:31 +0100 Subject: [PATCH 06/10] ENH: Include missing labels in error message for loc GH34272 PR feedback: what's new code block removed; additional test for many or long missing labels, context manager for display options. --- doc/source/whatsnew/v1.1.0.rst | 9 +-------- pandas/core/indexing.py | 17 +++++++++------- pandas/tests/indexing/test_indexing.py | 28 ++++++++++++++++++++++---- 3 files changed, 35 insertions(+), 19 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index c143fc5f4c0fa..1734df47f00be 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -19,14 +19,7 @@ KeyErrors raised by loc specify missing labels ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Previously, if labels were missing for a loc call, a Key Error was raised stating that this was no longer supported. -Now the error message also includes a list of the missing labels. For example, -.. code-block:: ipython - - In [1]: s = pd.Series({"a": 1, "b": 2, "c": 3}) - In [2]: s.loc[["a", "b", "missing_0", "c", "missing_1", "missing_2"]] - Passing list-likes to .loc or [] with any missing labels is no longer supported. - The following labels were missing: ['missing_0', 'missing_1', 'missing_2']. See - https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike +Now the error message also includes a list of the missing labels (max 10 items, display width 80 characters). .. _whatsnew_110.astype_string: diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b79b92b52921f..6b7f3a46f1808 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1303,13 +1303,16 @@ def _validate_read_indexer( # code, so we want to avoid warning & then # just raising if not ax.is_categorical(): - not_found = list(key[missing_mask]) - raise KeyError( - "Passing list-likes to .loc or [] with any missing labels " - "is no longer supported. " - f"The following labels were missing: {not_found}. " - "See https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike" # noqa:E501 - ) + not_found = key[missing_mask] + from pandas import option_context + + with option_context("display.max_seq_items", 10, "display.width", 80): + raise KeyError( + "Passing list-likes to .loc or [] with any missing labels " + "is no longer supported. " + f"The following labels were missing: {not_found}. " + "See https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike" # noqa:E501 + ) @doc(IndexingMixin.iloc) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 750eacd385df6..b77c47f927517 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -1077,10 +1077,30 @@ def test_setitem_with_bool_mask_and_values_matching_n_trues_in_length(): tm.assert_series_equal(result, expected) -def test_missing_labels_inside_loc(): +def test_missing_labels_inside_loc_matched_in_error_message(): # GH34272 s = pd.Series({"a": 1, "b": 2, "c": 3}) - with pytest.raises(KeyError) as e: + error_message_regex = "missing_0.*missing_1.*missing_2" + with pytest.raises(KeyError, match=error_message_regex): s.loc[["a", "b", "missing_0", "c", "missing_1", "missing_2"]] - missing_labels = ["missing_0", "missing_1", "missing_2"] - assert all(missing_label in str(e.value) for missing_label in missing_labels) + + +def test_many_missing_labels_inside_loc_error_message_limited(): + # GH34272 + n = 10000 + missing_labels = [f"missing_{label}" for label in range(n)] + s = pd.Series({"a": 1, "b": 2, "c": 3}) + # regex checks labels between 4 and 9995 are replaced with ellipses + error_message_regex = "missing_4.*\\.\\.\\..*missing_9995" + with pytest.raises(KeyError, match=error_message_regex): + s.loc[["a", "c"] + missing_labels] + + +def test_long_text_missing_labels_inside_loc_error_message_limited(): + # GH34272 + s = pd.Series({"a": 1, "b": 2, "c": 3}) + missing_labels = [f"long_missing_label_text_{i}" * 5 for i in range(3)] + # regex checks for very long labels there are new lines between each + error_message_regex = "long_missing_label_text_0.*\\\\n.*long_missing_label_text_1" + with pytest.raises(KeyError, match=error_message_regex): + s.loc[["a", "c"] + missing_labels] From c460a03e8ee8c9a3041f0f5f33761e1360f7dcbe Mon Sep 17 00:00:00 2001 From: tim <43515959+timhunderwood@users.noreply.github.com> Date: Thu, 25 Jun 2020 20:43:24 +0100 Subject: [PATCH 07/10] ENH: Include missing labels in error message for loc GH34272 PR feedback: reference issue in what's new and import position. --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/core/indexing.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 1734df47f00be..cdbd4ac0f7821 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -19,7 +19,7 @@ KeyErrors raised by loc specify missing labels ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Previously, if labels were missing for a loc call, a Key Error was raised stating that this was no longer supported. -Now the error message also includes a list of the missing labels (max 10 items, display width 80 characters). +Now the error message also includes a list of the missing labels (max 10 items, display width 80 characters). See GH34272. .. _whatsnew_110.astype_string: diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 6b7f3a46f1808..32219c6d82940 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2,6 +2,7 @@ import numpy as np +from pandas._config.config import option_context from pandas._libs.indexing import _NDFrameIndexerBase from pandas._libs.lib import item_from_zerodim from pandas.errors import AbstractMethodError, InvalidIndexError @@ -1304,7 +1305,6 @@ def _validate_read_indexer( # just raising if not ax.is_categorical(): not_found = key[missing_mask] - from pandas import option_context with option_context("display.max_seq_items", 10, "display.width", 80): raise KeyError( From ecaf34596819f7acc35bdc32edbd03341ca8ccf2 Mon Sep 17 00:00:00 2001 From: tim <43515959+timhunderwood@users.noreply.github.com> Date: Thu, 25 Jun 2020 21:11:46 +0100 Subject: [PATCH 08/10] ENH: Include missing labels in error message for loc GH34272 PR feedback: import sorting (isort) fix --- pandas/core/indexing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 32219c6d82940..3cf20b68c84f4 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -3,6 +3,7 @@ import numpy as np from pandas._config.config import option_context + from pandas._libs.indexing import _NDFrameIndexerBase from pandas._libs.lib import item_from_zerodim from pandas.errors import AbstractMethodError, InvalidIndexError From 744c82823d38d25180415fa774f8a038028d4127 Mon Sep 17 00:00:00 2001 From: tim <43515959+timhunderwood@users.noreply.github.com> Date: Thu, 25 Jun 2020 23:22:24 +0100 Subject: [PATCH 09/10] ENH: Include missing labels in error message for loc GH34272 PR feedback: add issue tag --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index cdbd4ac0f7821..d68c8588e2704 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -19,7 +19,7 @@ KeyErrors raised by loc specify missing labels ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Previously, if labels were missing for a loc call, a Key Error was raised stating that this was no longer supported. -Now the error message also includes a list of the missing labels (max 10 items, display width 80 characters). See GH34272. +Now the error message also includes a list of the missing labels (max 10 items, display width 80 characters). See :issue:`34272`. .. _whatsnew_110.astype_string: From 18d2b8347fd8256e053069ff4cc0a3fd5f63145f Mon Sep 17 00:00:00 2001 From: tim <43515959+timhunderwood@users.noreply.github.com> Date: Fri, 26 Jun 2020 19:53:31 +0100 Subject: [PATCH 10/10] ENH: Include missing labels in error message for loc GH34272 PR feedback: minor edit of what is new message. --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index d68c8588e2704..9c78c8b2cbebc 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -17,7 +17,7 @@ Enhancements KeyErrors raised by loc specify missing labels ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Previously, if labels were missing for a loc call, a Key Error was raised stating that this was no longer supported. +Previously, if labels were missing for a loc call, a KeyError was raised stating that this was no longer supported. Now the error message also includes a list of the missing labels (max 10 items, display width 80 characters). See :issue:`34272`.