Skip to content

Commit b7934ec

Browse files
authored
Improve the error message for missing required inputs (#37890)
* improve message for missing inputs error * black reformat * enhance the code for missing column check * update * Handle the case when evaluator has only "conversation" param * improve the error message by including target generated columns * fix the mypy error * fix black
1 parent 00b616e commit b7934ec

File tree

3 files changed

+136
-75
lines changed

3 files changed

+136
-75
lines changed

sdk/evaluation/azure-ai-evaluation/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,11 @@
1010

1111
### Bugs Fixed
1212
- Adversarial Conversation simulations would fail with `Forbidden`. Added logic to re-fetch token in the exponential retry logic to retrive RAI Service response.
13+
- Fixed an issue where the Evaluate API did not fail due to missing inputs when the target did not return columns required by the evaluators.
1314

1415
### Other Changes
1516
- Enhance the error message to provide clearer instruction when required packages for the remote tracking feature are missing.
17+
- Print the per-evaluator run summary at the end of the Evaluate API call to make troubleshooting row-level failures easier.
1618

1719
## 1.0.0b3 (2024-10-01)
1820

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py

Lines changed: 119 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -163,34 +163,127 @@ def _aggregate_metrics(df: pd.DataFrame, evaluators: Dict[str, Callable]) -> Dic
163163
return metrics
164164

165165

166-
def _validate_input_data_for_evaluator(evaluator, evaluator_name, df_data, is_target_fn=False):
166+
def _validate_columns_for_target(
167+
df: pd.DataFrame,
168+
target: Callable,
169+
) -> None:
170+
"""
171+
Check that all columns needed by target function are present.
172+
173+
:param df: The data frame to be validated.
174+
:type df: pd.DataFrame
175+
:param target: The callable to be applied to data set.
176+
:type target: Optional[Callable]
177+
:raises EvaluationException: If the column starts with "__outputs." or if the input data contains missing fields.
178+
"""
179+
if any(c.startswith(Prefixes.TSG_OUTPUTS) for c in df.columns):
180+
msg = "The column cannot start from " f'"{Prefixes.TSG_OUTPUTS}" if target was defined.'
181+
raise EvaluationException(
182+
message=msg,
183+
internal_message=msg,
184+
target=ErrorTarget.EVALUATE,
185+
category=ErrorCategory.INVALID_VALUE,
186+
blame=ErrorBlame.USER_ERROR,
187+
)
188+
# If the target function is given, it may return
189+
# several columns and hence we cannot check the availability of columns
190+
# without knowing target function semantics.
191+
# Instead, here we will validate the columns, taken by target.
167192
required_inputs = [
168193
param.name
169-
for param in inspect.signature(evaluator).parameters.values()
194+
for param in inspect.signature(target).parameters.values()
170195
if param.default == inspect.Parameter.empty and param.name not in ["kwargs", "args", "self"]
171196
]
172197

173-
missing_inputs = [col for col in required_inputs if col not in df_data.columns]
174-
if missing_inputs and "conversation" in required_inputs:
175-
non_conversation_inputs = [val for val in required_inputs if val != "conversation"]
176-
if len(missing_inputs) == len(non_conversation_inputs) and [
177-
input in non_conversation_inputs for input in missing_inputs
178-
]:
179-
missing_inputs = []
198+
missing_inputs = [col for col in required_inputs if col not in df.columns]
180199
if missing_inputs:
181-
if not is_target_fn:
182-
msg = f"Missing required inputs for evaluator {evaluator_name} : {missing_inputs}."
183-
raise EvaluationException(
184-
message=msg,
185-
internal_message=msg,
186-
target=ErrorTarget.EVALUATE,
187-
category=ErrorCategory.MISSING_FIELD,
188-
blame=ErrorBlame.USER_ERROR,
189-
)
190-
msg = f"Missing required inputs for target : {missing_inputs}."
200+
msg = f"Missing required inputs for target: {missing_inputs}."
191201
raise EvaluationException(
192202
message=msg,
193-
internal_message=msg,
203+
target=ErrorTarget.EVALUATE,
204+
category=ErrorCategory.MISSING_FIELD,
205+
blame=ErrorBlame.USER_ERROR,
206+
)
207+
208+
209+
def _validate_columns_for_evaluators(
210+
df: pd.DataFrame,
211+
evaluators: Dict[str, Callable],
212+
target: Optional[Callable],
213+
target_generated_columns: Optional[Set[str]],
214+
column_mapping: Dict[str, Dict[str, str]],
215+
) -> None:
216+
"""
217+
Check that all columns needed by evaluators are present.
218+
219+
:param df: The data frame to be validated.
220+
:type df: pd.DataFrame
221+
:param evaluators: The dictionary of evaluators.
222+
:type evaluators: Dict[str, Callable]
223+
:param target: The callable to be applied to data set.
224+
:type target: Optional[Callable]
225+
:param target_generated_columns: The set of columns generated by the target callable.
226+
:type target_generated_columns: Optional[Set[str]]
227+
:param column_mapping: Dictionary mapping evaluator name to evaluator column mapping.
228+
:type column_mapping: Dict[str, Dict[str, str]]
229+
:raises EvaluationException: If data is missing required inputs or if the target callable did not generate the necessary columns.
230+
"""
231+
missing_inputs_per_evaluator = {}
232+
233+
for evaluator_name, evaluator in evaluators.items():
234+
# Apply column mapping
235+
mapping_config = column_mapping.get(evaluator_name, column_mapping.get("default", None))
236+
new_df = _apply_column_mapping(df, mapping_config)
237+
238+
# Validate input data for evaluator
239+
is_built_in = evaluator.__module__.startswith("azure.ai.evaluation")
240+
if is_built_in:
241+
# Note that for built-in evaluators supporting the "conversation" parameter,
242+
# input parameters are now optional.
243+
evaluator_params = [
244+
param.name
245+
for param in inspect.signature(evaluator).parameters.values()
246+
if param.name not in ["kwargs", "args", "self"]
247+
]
248+
249+
if "conversation" in evaluator_params and "conversation" in new_df.columns:
250+
# Ignore the missing fields if "conversation" presents in the input data
251+
missing_inputs = []
252+
else:
253+
missing_inputs = [col for col in evaluator_params if col not in new_df.columns]
254+
255+
# If "conversation" is the only parameter and it is missing, keep it in the missing inputs
256+
# Otherwise, remove it from the missing inputs
257+
if "conversation" in missing_inputs:
258+
if not (evaluator_params == ["conversation"] and missing_inputs == ["conversation"]):
259+
missing_inputs.remove("conversation")
260+
else:
261+
evaluator_params = [
262+
param.name
263+
for param in inspect.signature(evaluator).parameters.values()
264+
if param.default == inspect.Parameter.empty and param.name not in ["kwargs", "args", "self"]
265+
]
266+
267+
missing_inputs = [col for col in evaluator_params if col not in new_df.columns]
268+
269+
if missing_inputs:
270+
missing_inputs_per_evaluator[evaluator_name] = missing_inputs
271+
272+
if missing_inputs_per_evaluator:
273+
msg = "Some evaluators are missing required inputs:\n"
274+
for evaluator_name, missing in missing_inputs_per_evaluator.items():
275+
msg += f"- {evaluator_name}: {missing}\n"
276+
277+
# Add the additional notes
278+
msg += "\nTo resolve this issue:\n"
279+
msg += "- Ensure the data contains required inputs.\n"
280+
if target is not None:
281+
msg += "- Verify that the target is generating the necessary columns for the evaluators. "
282+
msg += f"Currently generated columns: {target_generated_columns} \n"
283+
msg += "- Check that the column mapping is correctly configured."
284+
285+
raise EvaluationException(
286+
message=msg.strip(),
194287
target=ErrorTarget.EVALUATE,
195288
category=ErrorCategory.MISSING_FIELD,
196289
blame=ErrorBlame.USER_ERROR,
@@ -288,50 +381,6 @@ def _validate_and_load_data(target, data, evaluators, output_path, azure_ai_proj
288381
return initial_data_df
289382

290383

291-
def _validate_columns(
292-
df: pd.DataFrame,
293-
evaluators: Dict[str, Callable],
294-
target: Optional[Callable],
295-
column_mapping: Dict[str, Dict[str, str]],
296-
) -> None:
297-
"""
298-
Check that all columns needed by evaluator or target function are present.
299-
300-
:param df: The data frame to be validated.
301-
:type df: pd.DataFrame
302-
:param evaluators: The dictionary of evaluators.
303-
:type evaluators: Dict[str, Callable]
304-
:param target: The callable to be applied to data set.
305-
:type target: Optional[Callable]
306-
:param column_mapping: Dictionary mapping evaluator name to evaluator column mapping
307-
:type column_mapping: Dict[str, Dict[str, str]]
308-
:raises EvaluationException: If column starts from "__outputs." while target is defined.
309-
"""
310-
if target:
311-
if any(c.startswith(Prefixes.TSG_OUTPUTS) for c in df.columns):
312-
msg = "The column cannot start from " f'"{Prefixes.TSG_OUTPUTS}" if target was defined.'
313-
raise EvaluationException(
314-
message=msg,
315-
internal_message=msg,
316-
target=ErrorTarget.EVALUATE,
317-
category=ErrorCategory.INVALID_VALUE,
318-
blame=ErrorBlame.USER_ERROR,
319-
)
320-
# If the target function is given, it may return
321-
# several columns and hence we cannot check the availability of columns
322-
# without knowing target function semantics.
323-
# Instead, here we will validate the columns, taken by target.
324-
_validate_input_data_for_evaluator(target, None, df, is_target_fn=True)
325-
else:
326-
for evaluator_name, evaluator in evaluators.items():
327-
# Apply column mapping
328-
mapping_config = column_mapping.get(evaluator_name, column_mapping.get("default", None))
329-
new_df = _apply_column_mapping(df, mapping_config)
330-
331-
# Validate input data for evaluator
332-
_validate_input_data_for_evaluator(evaluator, evaluator_name, new_df)
333-
334-
335384
def _apply_target_to_data(
336385
target: Callable,
337386
data: str,
@@ -604,7 +653,9 @@ def _evaluate( # pylint: disable=too-many-locals,too-many-statements
604653
for evaluator_name, evaluator_configuration in evaluator_config.items()
605654
}
606655
)
607-
_validate_columns(input_data_df, evaluators, target, column_mapping)
656+
657+
if target is not None:
658+
_validate_columns_for_target(input_data_df, target)
608659

609660
# Target Run
610661
try:
@@ -639,6 +690,7 @@ def _evaluate( # pylint: disable=too-many-locals,too-many-statements
639690
column_mapping.setdefault("default", {})
640691

641692
# If target is set, apply 1-1 column mapping from target outputs to evaluator inputs
693+
target_generated_columns: Set[str] = set()
642694
if data is not None and target is not None:
643695
input_data_df, target_generated_columns, target_run = _apply_target_to_data(
644696
target, data, pf_client, input_data_df, evaluation_name, _run_name=kwargs.get("_run_name")
@@ -656,9 +708,8 @@ def _evaluate( # pylint: disable=too-many-locals,too-many-statements
656708
if col not in mapping and run_output not in mapped_to_values:
657709
column_mapping[evaluator_name][col] = run_output # pylint: disable=unnecessary-dict-index-lookup
658710

659-
# After we have generated all columns we can check if we have
660-
# everything we need for evaluators.
661-
_validate_columns(input_data_df, evaluators, target=None, column_mapping=column_mapping)
711+
# After we have generated all columns, we can check if we have everything we need for evaluators.
712+
_validate_columns_for_evaluators(input_data_df, evaluators, target, target_generated_columns, column_mapping)
662713

663714
# Apply 1-1 mapping from input data to evaluator inputs, excluding values already assigned
664715
# via target mapping.

sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -143,20 +143,26 @@ def test_evaluate_missing_required_inputs(self, missing_columns_jsonl_file):
143143
with pytest.raises(EvaluationException) as exc_info:
144144
evaluate(data=missing_columns_jsonl_file, evaluators={"g": F1ScoreEvaluator()})
145145

146-
assert "Missing required inputs for evaluator g : ['ground_truth']." in exc_info.value.args[0]
146+
expected_message = "Some evaluators are missing required inputs:\n" "- g: ['ground_truth']\n"
147+
assert expected_message in exc_info.value.args[0]
147148

148149
def test_evaluate_missing_required_inputs_target(self, questions_wrong_file):
149150
with pytest.raises(EvaluationException) as exc_info:
150151
evaluate(data=questions_wrong_file, evaluators={"g": F1ScoreEvaluator()}, target=_target_fn)
151-
assert "Missing required inputs for target : ['query']." in exc_info.value.args[0]
152+
assert "Missing required inputs for target: ['query']." in exc_info.value.args[0]
152153

153-
def test_wrong_target(self, questions_file):
154-
"""Test error, when target function does not generate required column."""
154+
def test_target_not_generate_required_columns(self, questions_file):
155155
with pytest.raises(EvaluationException) as exc_info:
156-
# target_fn will generate the "response", but not ground truth.
156+
# target_fn will generate the "response", but not "ground_truth".
157157
evaluate(data=questions_file, evaluators={"g": F1ScoreEvaluator()}, target=_target_fn)
158158

159-
assert "Missing required inputs for evaluator g : ['ground_truth']." in exc_info.value.args[0]
159+
expected_message = "Some evaluators are missing required inputs:\n" "- g: ['ground_truth']\n"
160+
161+
expected_message2 = "Verify that the target is generating the necessary columns for the evaluators. "
162+
expected_message2 += "Currently generated columns: {'response'}"
163+
164+
assert expected_message in exc_info.value.args[0]
165+
assert expected_message2 in exc_info.value.args[0]
160166

161167
def test_target_raises_on_outputs(self):
162168
"""Test we are raising exception if the output is column is present in the input."""
@@ -553,7 +559,9 @@ def test_optional_inputs_with_data(self, questions_file, questions_answers_basic
553559
},
554560
_use_pf_client=use_pf_client,
555561
) # type: ignore
556-
assert exc_info._excinfo[1].__str__() == "Missing required inputs for evaluator non : ['response']." # type: ignore
562+
563+
expected_message = "Some evaluators are missing required inputs:\n" "- non: ['response']\n"
564+
assert expected_message in exc_info.value.args[0]
557565

558566
# Variants with default answer work when only question is inputted
559567
only_question_results = evaluate(

0 commit comments

Comments
 (0)