@@ -163,34 +163,127 @@ def _aggregate_metrics(df: pd.DataFrame, evaluators: Dict[str, Callable]) -> Dic
163
163
return metrics
164
164
165
165
166
- def _validate_input_data_for_evaluator (evaluator , evaluator_name , df_data , is_target_fn = False ):
166
+ def _validate_columns_for_target (
167
+ df : pd .DataFrame ,
168
+ target : Callable ,
169
+ ) -> None :
170
+ """
171
+ Check that all columns needed by target function are present.
172
+
173
+ :param df: The data frame to be validated.
174
+ :type df: pd.DataFrame
175
+ :param target: The callable to be applied to data set.
176
+ :type target: Optional[Callable]
177
+ :raises EvaluationException: If the column starts with "__outputs." or if the input data contains missing fields.
178
+ """
179
+ if any (c .startswith (Prefixes .TSG_OUTPUTS ) for c in df .columns ):
180
+ msg = "The column cannot start from " f'"{ Prefixes .TSG_OUTPUTS } " if target was defined.'
181
+ raise EvaluationException (
182
+ message = msg ,
183
+ internal_message = msg ,
184
+ target = ErrorTarget .EVALUATE ,
185
+ category = ErrorCategory .INVALID_VALUE ,
186
+ blame = ErrorBlame .USER_ERROR ,
187
+ )
188
+ # If the target function is given, it may return
189
+ # several columns and hence we cannot check the availability of columns
190
+ # without knowing target function semantics.
191
+ # Instead, here we will validate the columns, taken by target.
167
192
required_inputs = [
168
193
param .name
169
- for param in inspect .signature (evaluator ).parameters .values ()
194
+ for param in inspect .signature (target ).parameters .values ()
170
195
if param .default == inspect .Parameter .empty and param .name not in ["kwargs" , "args" , "self" ]
171
196
]
172
197
173
- missing_inputs = [col for col in required_inputs if col not in df_data .columns ]
174
- if missing_inputs and "conversation" in required_inputs :
175
- non_conversation_inputs = [val for val in required_inputs if val != "conversation" ]
176
- if len (missing_inputs ) == len (non_conversation_inputs ) and [
177
- input in non_conversation_inputs for input in missing_inputs
178
- ]:
179
- missing_inputs = []
198
+ missing_inputs = [col for col in required_inputs if col not in df .columns ]
180
199
if missing_inputs :
181
- if not is_target_fn :
182
- msg = f"Missing required inputs for evaluator { evaluator_name } : { missing_inputs } ."
183
- raise EvaluationException (
184
- message = msg ,
185
- internal_message = msg ,
186
- target = ErrorTarget .EVALUATE ,
187
- category = ErrorCategory .MISSING_FIELD ,
188
- blame = ErrorBlame .USER_ERROR ,
189
- )
190
- msg = f"Missing required inputs for target : { missing_inputs } ."
200
+ msg = f"Missing required inputs for target: { missing_inputs } ."
191
201
raise EvaluationException (
192
202
message = msg ,
193
- internal_message = msg ,
203
+ target = ErrorTarget .EVALUATE ,
204
+ category = ErrorCategory .MISSING_FIELD ,
205
+ blame = ErrorBlame .USER_ERROR ,
206
+ )
207
+
208
+
209
+ def _validate_columns_for_evaluators (
210
+ df : pd .DataFrame ,
211
+ evaluators : Dict [str , Callable ],
212
+ target : Optional [Callable ],
213
+ target_generated_columns : Optional [Set [str ]],
214
+ column_mapping : Dict [str , Dict [str , str ]],
215
+ ) -> None :
216
+ """
217
+ Check that all columns needed by evaluators are present.
218
+
219
+ :param df: The data frame to be validated.
220
+ :type df: pd.DataFrame
221
+ :param evaluators: The dictionary of evaluators.
222
+ :type evaluators: Dict[str, Callable]
223
+ :param target: The callable to be applied to data set.
224
+ :type target: Optional[Callable]
225
+ :param target_generated_columns: The set of columns generated by the target callable.
226
+ :type target_generated_columns: Optional[Set[str]]
227
+ :param column_mapping: Dictionary mapping evaluator name to evaluator column mapping.
228
+ :type column_mapping: Dict[str, Dict[str, str]]
229
+ :raises EvaluationException: If data is missing required inputs or if the target callable did not generate the necessary columns.
230
+ """
231
+ missing_inputs_per_evaluator = {}
232
+
233
+ for evaluator_name , evaluator in evaluators .items ():
234
+ # Apply column mapping
235
+ mapping_config = column_mapping .get (evaluator_name , column_mapping .get ("default" , None ))
236
+ new_df = _apply_column_mapping (df , mapping_config )
237
+
238
+ # Validate input data for evaluator
239
+ is_built_in = evaluator .__module__ .startswith ("azure.ai.evaluation" )
240
+ if is_built_in :
241
+ # Note that for built-in evaluators supporting the "conversation" parameter,
242
+ # input parameters are now optional.
243
+ evaluator_params = [
244
+ param .name
245
+ for param in inspect .signature (evaluator ).parameters .values ()
246
+ if param .name not in ["kwargs" , "args" , "self" ]
247
+ ]
248
+
249
+ if "conversation" in evaluator_params and "conversation" in new_df .columns :
250
+ # Ignore the missing fields if "conversation" presents in the input data
251
+ missing_inputs = []
252
+ else :
253
+ missing_inputs = [col for col in evaluator_params if col not in new_df .columns ]
254
+
255
+ # If "conversation" is the only parameter and it is missing, keep it in the missing inputs
256
+ # Otherwise, remove it from the missing inputs
257
+ if "conversation" in missing_inputs :
258
+ if not (evaluator_params == ["conversation" ] and missing_inputs == ["conversation" ]):
259
+ missing_inputs .remove ("conversation" )
260
+ else :
261
+ evaluator_params = [
262
+ param .name
263
+ for param in inspect .signature (evaluator ).parameters .values ()
264
+ if param .default == inspect .Parameter .empty and param .name not in ["kwargs" , "args" , "self" ]
265
+ ]
266
+
267
+ missing_inputs = [col for col in evaluator_params if col not in new_df .columns ]
268
+
269
+ if missing_inputs :
270
+ missing_inputs_per_evaluator [evaluator_name ] = missing_inputs
271
+
272
+ if missing_inputs_per_evaluator :
273
+ msg = "Some evaluators are missing required inputs:\n "
274
+ for evaluator_name , missing in missing_inputs_per_evaluator .items ():
275
+ msg += f"- { evaluator_name } : { missing } \n "
276
+
277
+ # Add the additional notes
278
+ msg += "\n To resolve this issue:\n "
279
+ msg += "- Ensure the data contains required inputs.\n "
280
+ if target is not None :
281
+ msg += "- Verify that the target is generating the necessary columns for the evaluators. "
282
+ msg += f"Currently generated columns: { target_generated_columns } \n "
283
+ msg += "- Check that the column mapping is correctly configured."
284
+
285
+ raise EvaluationException (
286
+ message = msg .strip (),
194
287
target = ErrorTarget .EVALUATE ,
195
288
category = ErrorCategory .MISSING_FIELD ,
196
289
blame = ErrorBlame .USER_ERROR ,
@@ -288,50 +381,6 @@ def _validate_and_load_data(target, data, evaluators, output_path, azure_ai_proj
288
381
return initial_data_df
289
382
290
383
291
- def _validate_columns (
292
- df : pd .DataFrame ,
293
- evaluators : Dict [str , Callable ],
294
- target : Optional [Callable ],
295
- column_mapping : Dict [str , Dict [str , str ]],
296
- ) -> None :
297
- """
298
- Check that all columns needed by evaluator or target function are present.
299
-
300
- :param df: The data frame to be validated.
301
- :type df: pd.DataFrame
302
- :param evaluators: The dictionary of evaluators.
303
- :type evaluators: Dict[str, Callable]
304
- :param target: The callable to be applied to data set.
305
- :type target: Optional[Callable]
306
- :param column_mapping: Dictionary mapping evaluator name to evaluator column mapping
307
- :type column_mapping: Dict[str, Dict[str, str]]
308
- :raises EvaluationException: If column starts from "__outputs." while target is defined.
309
- """
310
- if target :
311
- if any (c .startswith (Prefixes .TSG_OUTPUTS ) for c in df .columns ):
312
- msg = "The column cannot start from " f'"{ Prefixes .TSG_OUTPUTS } " if target was defined.'
313
- raise EvaluationException (
314
- message = msg ,
315
- internal_message = msg ,
316
- target = ErrorTarget .EVALUATE ,
317
- category = ErrorCategory .INVALID_VALUE ,
318
- blame = ErrorBlame .USER_ERROR ,
319
- )
320
- # If the target function is given, it may return
321
- # several columns and hence we cannot check the availability of columns
322
- # without knowing target function semantics.
323
- # Instead, here we will validate the columns, taken by target.
324
- _validate_input_data_for_evaluator (target , None , df , is_target_fn = True )
325
- else :
326
- for evaluator_name , evaluator in evaluators .items ():
327
- # Apply column mapping
328
- mapping_config = column_mapping .get (evaluator_name , column_mapping .get ("default" , None ))
329
- new_df = _apply_column_mapping (df , mapping_config )
330
-
331
- # Validate input data for evaluator
332
- _validate_input_data_for_evaluator (evaluator , evaluator_name , new_df )
333
-
334
-
335
384
def _apply_target_to_data (
336
385
target : Callable ,
337
386
data : str ,
@@ -604,7 +653,9 @@ def _evaluate( # pylint: disable=too-many-locals,too-many-statements
604
653
for evaluator_name , evaluator_configuration in evaluator_config .items ()
605
654
}
606
655
)
607
- _validate_columns (input_data_df , evaluators , target , column_mapping )
656
+
657
+ if target is not None :
658
+ _validate_columns_for_target (input_data_df , target )
608
659
609
660
# Target Run
610
661
try :
@@ -639,6 +690,7 @@ def _evaluate( # pylint: disable=too-many-locals,too-many-statements
639
690
column_mapping .setdefault ("default" , {})
640
691
641
692
# If target is set, apply 1-1 column mapping from target outputs to evaluator inputs
693
+ target_generated_columns : Set [str ] = set ()
642
694
if data is not None and target is not None :
643
695
input_data_df , target_generated_columns , target_run = _apply_target_to_data (
644
696
target , data , pf_client , input_data_df , evaluation_name , _run_name = kwargs .get ("_run_name" )
@@ -656,9 +708,8 @@ def _evaluate( # pylint: disable=too-many-locals,too-many-statements
656
708
if col not in mapping and run_output not in mapped_to_values :
657
709
column_mapping [evaluator_name ][col ] = run_output # pylint: disable=unnecessary-dict-index-lookup
658
710
659
- # After we have generated all columns we can check if we have
660
- # everything we need for evaluators.
661
- _validate_columns (input_data_df , evaluators , target = None , column_mapping = column_mapping )
711
+ # After we have generated all columns, we can check if we have everything we need for evaluators.
712
+ _validate_columns_for_evaluators (input_data_df , evaluators , target , target_generated_columns , column_mapping )
662
713
663
714
# Apply 1-1 mapping from input data to evaluator inputs, excluding values already assigned
664
715
# via target mapping.
0 commit comments