Improve the error message for missing required inputs (#37890)

ninghu · web-flow · commit b7934ecd2fda · 2024-10-16T14:08:38.000-07:00
* improve message for missing inputs error

* black reformat

* enhance the code for missing column check

* update

* Handle the case when evaluator has only "conversation" param

* improve the error message by including target generated columns

* fix the mypy error

* fix black
diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
@@ -10,9 +10,11 @@
 
 ### Bugs Fixed
 - Adversarial Conversation simulations would fail with `Forbidden`. Added logic to re-fetch token in the exponential retry logic to retrive RAI Service response.
+- Fixed an issue where the Evaluate API did not fail due to missing inputs when the target did not return columns required by the evaluators.
 
 ### Other Changes
 - Enhance the error message to provide clearer instruction when required packages for the remote tracking feature are missing.
+- Print the per-evaluator run summary at the end of the Evaluate API call to make troubleshooting row-level failures easier.
 
 ## 1.0.0b3 (2024-10-01)
 
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
@@ -163,34 +163,127 @@ def _aggregate_metrics(df: pd.DataFrame, evaluators: Dict[str, Callable]) -> Dic
     return metrics
 
 
-def _validate_input_data_for_evaluator(evaluator, evaluator_name, df_data, is_target_fn=False):
+def _validate_columns_for_target(
+    df: pd.DataFrame,
+    target: Callable,
+) -> None:
+    """
+    Check that all columns needed by target function are present.
+
+    :param df: The data frame to be validated.
+    :type df: pd.DataFrame
+    :param target: The callable to be applied to data set.
+    :type target: Optional[Callable]
+    :raises EvaluationException: If the column starts with "__outputs." or if the input data contains missing fields.
+    """
+    if any(c.startswith(Prefixes.TSG_OUTPUTS) for c in df.columns):
+        msg = "The column cannot start from " f'"{Prefixes.TSG_OUTPUTS}" if target was defined.'
+        raise EvaluationException(
+            message=msg,
+            internal_message=msg,
+            target=ErrorTarget.EVALUATE,
+            category=ErrorCategory.INVALID_VALUE,
+            blame=ErrorBlame.USER_ERROR,
+        )
+    # If the target function is given, it may return
+    # several columns and hence we cannot check the availability of columns
+    # without knowing target function semantics.
+    # Instead, here we will validate the columns, taken by target.
     required_inputs = [
         param.name
-        for param in inspect.signature(evaluator).parameters.values()
+        for param in inspect.signature(target).parameters.values()
         if param.default == inspect.Parameter.empty and param.name not in ["kwargs", "args", "self"]
     ]
 
-    missing_inputs = [col for col in required_inputs if col not in df_data.columns]
-    if missing_inputs and "conversation" in required_inputs:
-        non_conversation_inputs = [val for val in required_inputs if val != "conversation"]
-        if len(missing_inputs) == len(non_conversation_inputs) and [
-            input in non_conversation_inputs for input in missing_inputs
-        ]:
-            missing_inputs = []
+    missing_inputs = [col for col in required_inputs if col not in df.columns]
     if missing_inputs:
-        if not is_target_fn:
-            msg = f"Missing required inputs for evaluator {evaluator_name} : {missing_inputs}."
-            raise EvaluationException(
-                message=msg,
-                internal_message=msg,
-                target=ErrorTarget.EVALUATE,
-                category=ErrorCategory.MISSING_FIELD,
-                blame=ErrorBlame.USER_ERROR,
-            )
-        msg = f"Missing required inputs for target : {missing_inputs}."
+        msg = f"Missing required inputs for target: {missing_inputs}."
         raise EvaluationException(
             message=msg,
-            internal_message=msg,
+            target=ErrorTarget.EVALUATE,
+            category=ErrorCategory.MISSING_FIELD,
+            blame=ErrorBlame.USER_ERROR,
+        )
+
+
+def _validate_columns_for_evaluators(
+    df: pd.DataFrame,
+    evaluators: Dict[str, Callable],
+    target: Optional[Callable],
+    target_generated_columns: Optional[Set[str]],
+    column_mapping: Dict[str, Dict[str, str]],
+) -> None:
+    """
+    Check that all columns needed by evaluators are present.
+
+    :param df: The data frame to be validated.
+    :type df: pd.DataFrame
+    :param evaluators: The dictionary of evaluators.
+    :type evaluators: Dict[str, Callable]
+    :param target: The callable to be applied to data set.
+    :type target: Optional[Callable]
+    :param target_generated_columns: The set of columns generated by the target callable.
+    :type target_generated_columns: Optional[Set[str]]
+    :param column_mapping: Dictionary mapping evaluator name to evaluator column mapping.
+    :type column_mapping: Dict[str, Dict[str, str]]
+    :raises EvaluationException: If data is missing required inputs or if the target callable did not generate the necessary columns.
+    """
+    missing_inputs_per_evaluator = {}
+
+    for evaluator_name, evaluator in evaluators.items():
+        # Apply column mapping
+        mapping_config = column_mapping.get(evaluator_name, column_mapping.get("default", None))
+        new_df = _apply_column_mapping(df, mapping_config)
+
+        # Validate input data for evaluator
+        is_built_in = evaluator.__module__.startswith("azure.ai.evaluation")
+        if is_built_in:
+            # Note that for built-in evaluators supporting the "conversation" parameter,
+            # input parameters are now optional.
+            evaluator_params = [
+                param.name
+                for param in inspect.signature(evaluator).parameters.values()
+                if param.name not in ["kwargs", "args", "self"]
+            ]
+
+            if "conversation" in evaluator_params and "conversation" in new_df.columns:
+                # Ignore the missing fields if "conversation" presents in the input data
+                missing_inputs = []
+            else:
+                missing_inputs = [col for col in evaluator_params if col not in new_df.columns]
+
+                # If "conversation" is the only parameter and it is missing, keep it in the missing inputs
+                # Otherwise, remove it from the missing inputs
+                if "conversation" in missing_inputs:
+                    if not (evaluator_params == ["conversation"] and missing_inputs == ["conversation"]):
+                        missing_inputs.remove("conversation")
+        else:
+            evaluator_params = [
+                param.name
+                for param in inspect.signature(evaluator).parameters.values()
+                if param.default == inspect.Parameter.empty and param.name not in ["kwargs", "args", "self"]
+            ]
+
+            missing_inputs = [col for col in evaluator_params if col not in new_df.columns]
+
+        if missing_inputs:
+            missing_inputs_per_evaluator[evaluator_name] = missing_inputs
+
+    if missing_inputs_per_evaluator:
+        msg = "Some evaluators are missing required inputs:\n"
+        for evaluator_name, missing in missing_inputs_per_evaluator.items():
+            msg += f"- {evaluator_name}: {missing}\n"
+
+        # Add the additional notes
+        msg += "\nTo resolve this issue:\n"
+        msg += "- Ensure the data contains required inputs.\n"
+        if target is not None:
+            msg += "- Verify that the target is generating the necessary columns for the evaluators. "
+            msg += f"Currently generated columns: {target_generated_columns} \n"
+        msg += "- Check that the column mapping is correctly configured."
+
+        raise EvaluationException(
+            message=msg.strip(),
             target=ErrorTarget.EVALUATE,
             category=ErrorCategory.MISSING_FIELD,
             blame=ErrorBlame.USER_ERROR,
@@ -288,50 +381,6 @@ def _validate_and_load_data(target, data, evaluators, output_path, azure_ai_proj
     return initial_data_df
 
 
-def _validate_columns(
-    df: pd.DataFrame,
-    evaluators: Dict[str, Callable],
-    target: Optional[Callable],
-    column_mapping: Dict[str, Dict[str, str]],
-) -> None:
-    """
-    Check that all columns needed by evaluator or target function are present.
-
-    :param df: The data frame to be validated.
-    :type df: pd.DataFrame
-    :param evaluators: The dictionary of evaluators.
-    :type evaluators: Dict[str, Callable]
-    :param target: The callable to be applied to data set.
-    :type target: Optional[Callable]
-    :param column_mapping: Dictionary mapping evaluator name to evaluator column mapping
-    :type column_mapping: Dict[str, Dict[str, str]]
-    :raises EvaluationException: If column starts from "__outputs." while target is defined.
-    """
-    if target:
-        if any(c.startswith(Prefixes.TSG_OUTPUTS) for c in df.columns):
-            msg = "The column cannot start from " f'"{Prefixes.TSG_OUTPUTS}" if target was defined.'
-            raise EvaluationException(
-                message=msg,
-                internal_message=msg,
-                target=ErrorTarget.EVALUATE,
-                category=ErrorCategory.INVALID_VALUE,
-                blame=ErrorBlame.USER_ERROR,
-            )
-        # If the target function is given, it may return
-        # several columns and hence we cannot check the availability of columns
-        # without knowing target function semantics.
-        # Instead, here we will validate the columns, taken by target.
-        _validate_input_data_for_evaluator(target, None, df, is_target_fn=True)
-    else:
-        for evaluator_name, evaluator in evaluators.items():
-            # Apply column mapping
-            mapping_config = column_mapping.get(evaluator_name, column_mapping.get("default", None))
-            new_df = _apply_column_mapping(df, mapping_config)
-
-            # Validate input data for evaluator
-            _validate_input_data_for_evaluator(evaluator, evaluator_name, new_df)
-
-
 def _apply_target_to_data(
     target: Callable,
     data: str,
@@ -604,7 +653,9 @@ def _evaluate(  # pylint: disable=too-many-locals,too-many-statements
             for evaluator_name, evaluator_configuration in evaluator_config.items()
         }
     )
-    _validate_columns(input_data_df, evaluators, target, column_mapping)
+
+    if target is not None:
+        _validate_columns_for_target(input_data_df, target)
 
     # Target Run
     try:
@@ -639,6 +690,7 @@ def _evaluate(  # pylint: disable=too-many-locals,too-many-statements
     column_mapping.setdefault("default", {})
 
     # If target is set, apply 1-1 column mapping from target outputs to evaluator inputs
+    target_generated_columns: Set[str] = set()
     if data is not None and target is not None:
         input_data_df, target_generated_columns, target_run = _apply_target_to_data(
             target, data, pf_client, input_data_df, evaluation_name, _run_name=kwargs.get("_run_name")
@@ -656,9 +708,8 @@ def _evaluate(  # pylint: disable=too-many-locals,too-many-statements
                 if col not in mapping and run_output not in mapped_to_values:
                     column_mapping[evaluator_name][col] = run_output  # pylint: disable=unnecessary-dict-index-lookup
 
-        # After we have generated all columns we can check if we have
-        # everything we need for evaluators.
-        _validate_columns(input_data_df, evaluators, target=None, column_mapping=column_mapping)
+    # After we have generated all columns, we can check if we have everything we need for evaluators.
+    _validate_columns_for_evaluators(input_data_df, evaluators, target, target_generated_columns, column_mapping)
 
     # Apply 1-1 mapping from input data to evaluator inputs, excluding values already assigned
     # via target mapping.
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py
@@ -143,20 +143,26 @@ def test_evaluate_missing_required_inputs(self, missing_columns_jsonl_file):
         with pytest.raises(EvaluationException) as exc_info:
             evaluate(data=missing_columns_jsonl_file, evaluators={"g": F1ScoreEvaluator()})
 
-        assert "Missing required inputs for evaluator g : ['ground_truth']." in exc_info.value.args[0]
+        expected_message = "Some evaluators are missing required inputs:\n" "- g: ['ground_truth']\n"
+        assert expected_message in exc_info.value.args[0]
 
     def test_evaluate_missing_required_inputs_target(self, questions_wrong_file):
         with pytest.raises(EvaluationException) as exc_info:
             evaluate(data=questions_wrong_file, evaluators={"g": F1ScoreEvaluator()}, target=_target_fn)
-        assert "Missing required inputs for target : ['query']." in exc_info.value.args[0]
+        assert "Missing required inputs for target: ['query']." in exc_info.value.args[0]
 
-    def test_wrong_target(self, questions_file):
-        """Test error, when target function does not generate required column."""
+    def test_target_not_generate_required_columns(self, questions_file):
         with pytest.raises(EvaluationException) as exc_info:
-            # target_fn will generate the "response", but not ground truth.
+            # target_fn will generate the "response", but not "ground_truth".
             evaluate(data=questions_file, evaluators={"g": F1ScoreEvaluator()}, target=_target_fn)
 
-        assert "Missing required inputs for evaluator g : ['ground_truth']." in exc_info.value.args[0]
+        expected_message = "Some evaluators are missing required inputs:\n" "- g: ['ground_truth']\n"
+
+        expected_message2 = "Verify that the target is generating the necessary columns for the evaluators. "
+        expected_message2 += "Currently generated columns: {'response'}"
+
+        assert expected_message in exc_info.value.args[0]
+        assert expected_message2 in exc_info.value.args[0]
 
     def test_target_raises_on_outputs(self):
         """Test we are raising exception if the output is column is present in the input."""
@@ -553,7 +559,9 @@ def test_optional_inputs_with_data(self, questions_file, questions_answers_basic
                 },
                 _use_pf_client=use_pf_client,
             )  # type: ignore
-        assert exc_info._excinfo[1].__str__() == "Missing required inputs for evaluator non : ['response']."  # type: ignore
+
+        expected_message = "Some evaluators are missing required inputs:\n" "- non: ['response']\n"
+        assert expected_message in exc_info.value.args[0]
 
         # Variants with default answer work when only question is inputted
         only_question_results = evaluate(