Skip to content

Commit

Permalink
Better validation for target
Browse files Browse the repository at this point in the history
  • Loading branch information
nick863 committed Apr 23, 2024
1 parent e7a93a6 commit 94e4ea9
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 3 deletions.
19 changes: 16 additions & 3 deletions src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def _calculate_mean(df) -> Dict[str, float]:
return mean_value.to_dict()


def _validate_input_data_for_evaluator(evaluator, evaluator_name, data_df):
def _validate_input_data_for_evaluator(evaluator, evaluator_name, data_df, is_target_fn=False):
required_inputs = [
param.name
for param in inspect.signature(evaluator).parameters.values()
Expand All @@ -34,7 +34,10 @@ def _validate_input_data_for_evaluator(evaluator, evaluator_name, data_df):

missing_inputs = [col for col in required_inputs if col not in data_df.columns]
if missing_inputs:
raise ValueError(f"Missing required inputs for evaluator {evaluator_name} : {missing_inputs}.")
if not is_target_fn:
raise ValueError(f"Missing required inputs for evaluator {evaluator_name} : {missing_inputs}.")
else:
raise ValueError(f"Missing required inputs for target : {missing_inputs}.")


def _validation(target, data, evaluators, output_path, tracking_uri, evaluation_name):
Expand Down Expand Up @@ -65,16 +68,23 @@ def _validation(target, data, evaluators, output_path, tracking_uri, evaluation_
if not isinstance(evaluation_name, str):
raise ValueError("evaluation_name must be a string.")

_validate_columns(data, evaluators, target)


def _validate_columns(data, evaluators, target):
try:
data_df = pd.read_json(data, lines=True)
except Exception as e:
raise ValueError(
f"Failed to load data from {data}. Please validate it is a valid jsonl data. Error: {str(e)}.")

if not target:
if target:
# If the target function is given, it may return
# several columns and hence we cannot check the availability of columns
# without knowing target function semantics.
# Instead, here we will validate the columns, taken by target.
_validate_input_data_for_evaluator(target, None, data_df, is_target_fn=True)
else:
for evaluator_name, evaluator in evaluators.items():
_validate_input_data_for_evaluator(evaluator, evaluator_name, data_df)

Expand Down Expand Up @@ -151,6 +161,9 @@ def evaluate(
tempfile_created = False
if data is not None and target is not None:
data = _apply_target_to_data(target, data, pf_client)
# After we have generated all columns we can check if we have
# everything we need for evaluators.
_validate_columns(data, evaluators, None)
tempfile_created = True

evaluator_info = {}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"request":"How long is flight from Earth to LV-426?"}
{"request":"Why there is no central heating on the street?"}
{"request":"Why these questions are so strange?"}
22 changes: 22 additions & 0 deletions src/promptflow-evals/tests/evals/unittests/test_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ def questions_file():
return os.path.join(data_path, "questions.jsonl")


@pytest.fixture
def questions_wrong_file():
data_path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data")
return os.path.join(data_path, "questions_wrong.jsonl")


@pytest.fixture
def questions_answers_file():
data_path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data")
Expand Down Expand Up @@ -90,6 +96,22 @@ def test_evaluate_missing_required_inputs(self, missing_columns_jsonl_file):

assert "Missing required inputs for evaluator g : ['ground_truth']." in exc_info.value.args[0]

def test_evaluate_missing_required_inputs_target(self, questions_wrong_file):
with pytest.raises(ValueError) as exc_info:
evaluate(data=questions_wrong_file,
evaluators={"g": F1ScoreEvaluator()},
target=_target_fn
)
assert "Missing required inputs for target : ['question']." in exc_info.value.args[0]

def test_wrong_target(self, questions_file):
"""Test error, when target fuction does not generate required column."""
with pytest.raises(ValueError) as exc_info:
# target_fn will generate the "answer", but not ground truth.
evaluate(data=questions_file, evaluators={"g": F1ScoreEvaluator()}, target=_target_fn)

assert "Missing required inputs for evaluator g : ['ground_truth']." in exc_info.value.args[0]

@pytest.mark.parametrize('script_is_file', [True, False])
def test_save_fun_as_flow(self, tmpdir, pf_client, script_is_file):
"""Test saving function as flow."""
Expand Down

0 comments on commit 94e4ea9

Please sign in to comment.