From 5f0ca8adc2404ff92ae17333e769e0fdd58ed62a Mon Sep 17 00:00:00 2001 From: Jered Dominguez-Trujillo Date: Thu, 19 Mar 2020 20:02:11 +0000 Subject: [PATCH 1/2] Issue 32755: More descriptive SpecificationError message that reports to user non-existing columns causing error Line too long - corrected What's New Updated Tests Fixed testing errors --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/core/base.py | 6 +++++- pandas/tests/groupby/aggregate/test_other.py | 4 +++- pandas/tests/resample/test_resample_api.py | 6 ++++-- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 720ce7af47a18..2432c1ef368fe 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -84,7 +84,7 @@ Other API changes - Added :meth:`DataFrame.value_counts` (:issue:`5377`) - :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`) - ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`) -- +- :meth:`DataFrame.agg` now provides more descriptive SpecificationError message when attempting to aggregating non-existant column (:issue:`32755`) Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/core/base.py b/pandas/core/base.py index e1c6bef66239d..840bbea3cce69 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -356,7 +356,11 @@ def _aggregate(self, arg, *args, **kwargs): if isinstance(obj, ABCDataFrame) and len( obj.columns.intersection(keys) ) != len(keys): - raise SpecificationError("nested renamer is not supported") + cols = sorted(set(keys) - set(obj.columns.intersection(keys))) + if len(cols) > 1: + raise SpecificationError(f"Columns {cols} do not exist") + else: + raise SpecificationError(f"Column {cols} does not exist") from pandas.core.reshape.concat import concat diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index 52ee3e652501c..554a466179e45 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -209,7 +209,7 @@ def test_aggregate_api_consistency(): expected = pd.concat([c_mean, c_sum, d_mean, d_sum], axis=1) expected.columns = MultiIndex.from_product([["C", "D"], ["mean", "sum"]]) - msg = r"nested renamer is not supported" + msg = r"Columns \['r', 'r2'\] do not exist" with pytest.raises(SpecificationError, match=msg): grouped[["D", "C"]].agg({"r": np.sum, "r2": np.mean}) @@ -224,9 +224,11 @@ def test_agg_dict_renaming_deprecation(): {"B": {"foo": ["sum", "max"]}, "C": {"bar": ["count", "min"]}} ) + msg = r"Column \['ma'\] does not exist" with pytest.raises(SpecificationError, match=msg): df.groupby("A")[["B", "C"]].agg({"ma": "max"}) + msg = r"nested renamer is not supported" with pytest.raises(SpecificationError, match=msg): df.groupby("A").B.agg({"foo": "count"}) diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 6389c88c99f73..5a4e5f532b317 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -287,7 +287,7 @@ def test_agg_consistency(): r = df.resample("3T") - msg = "nested renamer is not supported" + msg = r"Columns \['r1', 'r2'\] do not exist" with pytest.raises(pd.core.base.SpecificationError, match=msg): r.agg({"r1": "mean", "r2": "sum"}) @@ -419,7 +419,7 @@ def test_agg_misc(): [("result1", "A"), ("result1", "B"), ("result2", "A"), ("result2", "B")] ) - msg = "nested renamer is not supported" + msg = r"Columns \['result1', 'result2'\] do not exist" for t in cases: with pytest.raises(pd.core.base.SpecificationError, match=msg): t[["A", "B"]].agg(OrderedDict([("result1", np.sum), ("result2", np.mean)])) @@ -440,6 +440,8 @@ def test_agg_misc(): result = t[["A", "B"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]}) tm.assert_frame_equal(result, expected, check_like=True) + msg = "nested renamer is not supported" + # series like aggs for t in cases: with pytest.raises(pd.core.base.SpecificationError, match=msg): From 984e932fc44cf1f0929cf4be851ad49f4e158bbd Mon Sep 17 00:00:00 2001 From: Jered Dominguez-Trujillo Date: Sat, 21 Mar 2020 21:46:13 +0000 Subject: [PATCH 2/2] Fixed comments --- doc/source/whatsnew/v1.1.0.rst | 4 ++-- pandas/core/base.py | 5 +---- pandas/tests/groupby/aggregate/test_other.py | 4 ++-- pandas/tests/resample/test_resample_api.py | 4 ++-- 4 files changed, 7 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 2432c1ef368fe..5242426a07c7a 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -84,7 +84,7 @@ Other API changes - Added :meth:`DataFrame.value_counts` (:issue:`5377`) - :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`) - ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`) -- :meth:`DataFrame.agg` now provides more descriptive SpecificationError message when attempting to aggregating non-existant column (:issue:`32755`) +- Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -380,7 +380,7 @@ Reshaping - :meth:`DataFrame.replace` and :meth:`Series.replace` will raise a ``TypeError`` if ``to_replace`` is not an expected type. Previously the ``replace`` would fail silently (:issue:`18634`) - Bug in :meth:`DataFrame.apply` where callback was called with :class:`Series` parameter even though ``raw=True`` requested. (:issue:`32423`) - Bug in :meth:`DataFrame.pivot_table` losing timezone information when creating a :class:`MultiIndex` level from a column with timezone-aware dtype (:issue:`32558`) - +- :meth:`DataFrame.agg` now provides more descriptive ``SpecificationError`` message when attempting to aggregating non-existant column (:issue:`32755`) Sparse ^^^^^^ diff --git a/pandas/core/base.py b/pandas/core/base.py index 840bbea3cce69..bb7edf669cab1 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -357,10 +357,7 @@ def _aggregate(self, arg, *args, **kwargs): obj.columns.intersection(keys) ) != len(keys): cols = sorted(set(keys) - set(obj.columns.intersection(keys))) - if len(cols) > 1: - raise SpecificationError(f"Columns {cols} do not exist") - else: - raise SpecificationError(f"Column {cols} does not exist") + raise SpecificationError(f"Column(s) {cols} do not exist") from pandas.core.reshape.concat import concat diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index 554a466179e45..264cf40dc6984 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -209,7 +209,7 @@ def test_aggregate_api_consistency(): expected = pd.concat([c_mean, c_sum, d_mean, d_sum], axis=1) expected.columns = MultiIndex.from_product([["C", "D"], ["mean", "sum"]]) - msg = r"Columns \['r', 'r2'\] do not exist" + msg = r"Column\(s\) \['r', 'r2'\] do not exist" with pytest.raises(SpecificationError, match=msg): grouped[["D", "C"]].agg({"r": np.sum, "r2": np.mean}) @@ -224,7 +224,7 @@ def test_agg_dict_renaming_deprecation(): {"B": {"foo": ["sum", "max"]}, "C": {"bar": ["count", "min"]}} ) - msg = r"Column \['ma'\] does not exist" + msg = r"Column\(s\) \['ma'\] do not exist" with pytest.raises(SpecificationError, match=msg): df.groupby("A")[["B", "C"]].agg({"ma": "max"}) diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 5a4e5f532b317..5044a18e33248 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -287,7 +287,7 @@ def test_agg_consistency(): r = df.resample("3T") - msg = r"Columns \['r1', 'r2'\] do not exist" + msg = r"Column\(s\) \['r1', 'r2'\] do not exist" with pytest.raises(pd.core.base.SpecificationError, match=msg): r.agg({"r1": "mean", "r2": "sum"}) @@ -419,7 +419,7 @@ def test_agg_misc(): [("result1", "A"), ("result1", "B"), ("result2", "A"), ("result2", "B")] ) - msg = r"Columns \['result1', 'result2'\] do not exist" + msg = r"Column\(s\) \['result1', 'result2'\] do not exist" for t in cases: with pytest.raises(pd.core.base.SpecificationError, match=msg): t[["A", "B"]].agg(OrderedDict([("result1", np.sum), ("result2", np.mean)]))