Better validation for target

microsoft · Apr 23, 2024 · 94e4ea9 · 94e4ea9
1 parent e7a93a6
commit 94e4ea9
Show file tree

Hide file tree

Showing 3 changed files with 41 additions and 3 deletions.
diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py b/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py
@@ -25,7 +25,7 @@ def _calculate_mean(df) -> Dict[str, float]:
     return mean_value.to_dict()
 
 
-def _validate_input_data_for_evaluator(evaluator, evaluator_name, data_df):
+def _validate_input_data_for_evaluator(evaluator, evaluator_name, data_df, is_target_fn=False):
     required_inputs = [
         param.name
         for param in inspect.signature(evaluator).parameters.values()
@@ -34,7 +34,10 @@ def _validate_input_data_for_evaluator(evaluator, evaluator_name, data_df):
 
     missing_inputs = [col for col in required_inputs if col not in data_df.columns]
     if missing_inputs:
-        raise ValueError(f"Missing required inputs for evaluator {evaluator_name} : {missing_inputs}.")
+        if not is_target_fn:
+            raise ValueError(f"Missing required inputs for evaluator {evaluator_name} : {missing_inputs}.")
+        else:
+            raise ValueError(f"Missing required inputs for target : {missing_inputs}.")
 
 
 def _validation(target, data, evaluators, output_path, tracking_uri, evaluation_name):
@@ -65,16 +68,23 @@ def _validation(target, data, evaluators, output_path, tracking_uri, evaluation_
         if not isinstance(evaluation_name, str):
             raise ValueError("evaluation_name must be a string.")
 
+    _validate_columns(data, evaluators, target)
+
+
+def _validate_columns(data, evaluators, target):
     try:
         data_df = pd.read_json(data, lines=True)
     except Exception as e:
         raise ValueError(
             f"Failed to load data from {data}. Please validate it is a valid jsonl data. Error: {str(e)}.")
 
-    if not target:
+    if target:
         # If the target function is given, it may return
         # several columns and hence we cannot check the availability of columns
         # without knowing target function semantics.
+        # Instead, here we will validate the columns, taken by target.
+        _validate_input_data_for_evaluator(target, None, data_df, is_target_fn=True)
+    else:
         for evaluator_name, evaluator in evaluators.items():
             _validate_input_data_for_evaluator(evaluator, evaluator_name, data_df)
 
@@ -151,6 +161,9 @@ def evaluate(
     tempfile_created = False
     if data is not None and target is not None:
         data = _apply_target_to_data(target, data, pf_client)
+        # After we have generated all columns we can check if we have
+        # everything we need for evaluators.
+        _validate_columns(data, evaluators, None)
         tempfile_created = True
 
     evaluator_info = {}

diff --git a/src/promptflow-evals/tests/evals/unittests/data/questions_wrong.jsonl b/src/promptflow-evals/tests/evals/unittests/data/questions_wrong.jsonl
@@ -0,0 +1,3 @@
+{"request":"How long is flight from Earth to LV-426?"}
+{"request":"Why there is no central heating on the street?"}
+{"request":"Why these questions are so strange?"}
diff --git a/src/promptflow-evals/tests/evals/unittests/test_evaluate.py b/src/promptflow-evals/tests/evals/unittests/test_evaluate.py
@@ -31,6 +31,12 @@ def questions_file():
     return os.path.join(data_path, "questions.jsonl")
 
 
+@pytest.fixture
+def questions_wrong_file():
+    data_path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data")
+    return os.path.join(data_path, "questions_wrong.jsonl")
+
+
 @pytest.fixture
 def questions_answers_file():
     data_path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data")
@@ -90,6 +96,22 @@ def test_evaluate_missing_required_inputs(self, missing_columns_jsonl_file):
 
         assert "Missing required inputs for evaluator g : ['ground_truth']." in exc_info.value.args[0]
 
+    def test_evaluate_missing_required_inputs_target(self, questions_wrong_file):
+        with pytest.raises(ValueError) as exc_info:
+            evaluate(data=questions_wrong_file,
+                     evaluators={"g": F1ScoreEvaluator()},
+                     target=_target_fn
+                     )
+        assert "Missing required inputs for target : ['question']." in exc_info.value.args[0]
+
+    def test_wrong_target(self, questions_file):
+        """Test error, when target fuction does not generate required column."""
+        with pytest.raises(ValueError) as exc_info:
+            # target_fn will generate the "answer", but not ground truth.
+            evaluate(data=questions_file, evaluators={"g": F1ScoreEvaluator()}, target=_target_fn)
+
+        assert "Missing required inputs for evaluator g : ['ground_truth']." in exc_info.value.args[0]
+
     @pytest.mark.parametrize('script_is_file', [True, False])
     def test_save_fun_as_flow(self, tmpdir, pf_client, script_is_file):
         """Test saving function as flow."""