initial change

ninghu · ninghu · commit 8a1d6c61c043 · 2024-04-23T15:16:51.000-07:00
diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py b/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py
@@ -2,15 +2,13 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
 import inspect
-from types import FunctionType
+import re
 from typing import Callable, Dict, Optional
 
 import pandas as pd
 
 from promptflow.client import PFClient
 
-from ._code_client import CodeClient
-
 
 def _calculate_mean(df) -> Dict[str, float]:
     df.rename(columns={col: col.replace("outputs.", "") for col in df.columns}, inplace=True)
@@ -30,7 +28,7 @@ def _validate_input_data_for_evaluator(evaluator, evaluator_name, data_df):
         raise ValueError(f"Missing required inputs for evaluator {evaluator_name} : {missing_inputs}.")
 
 
-def _validation(target, data, evaluators, output_path, tracking_uri, evaluation_name):
+def _validation(target, data, evaluators, output_path, tracking_uri, evaluation_name, evaluator_config):
     if data is None:
         raise ValueError("data must be provided for evaluation.")
 
@@ -64,7 +62,41 @@ def _validation(target, data, evaluators, output_path, tracking_uri, evaluation_
         raise ValueError(f"Failed to load data from {data}. Please validate it is a valid jsonl data. Error: {str(e)}.")
 
     for evaluator_name, evaluator in evaluators.items():
-        _validate_input_data_for_evaluator(evaluator, evaluator_name, data_df)
+        # Apply column mapping
+        mapping_config = evaluator_config.get(evaluator_name, evaluator_config.get("default", None))
+        renamed_data_df = _apply_column_mapping(data_df, "data", mapping_config)
+
+        # Validate input data for evaluator
+        _validate_input_data_for_evaluator(evaluator, evaluator_name, renamed_data_df)
+
+
+def _apply_column_mapping(source_df, source_name, mapping_config, inplace=False):
+    SUPPORTED_SOURCE_NAMES = ["data", "target"]
+
+    result_df = source_df
+    if mapping_config:
+        column_mapping = {}
+        for map_to_key, map_value in mapping_config.items():
+            match = re.search(r"^\${([^{}]+)}$", map_value)
+
+            if match is not None:
+                pattern = match.group(1)
+
+                # Check if source reference is valid
+                source_reference = pattern.split(".")[0]
+                if source_reference not in SUPPORTED_SOURCE_NAMES:
+                    raise ValueError(
+                        f"'{source_reference}' is not a valid source reference. "
+                        + f"It should be one of {SUPPORTED_SOURCE_NAMES}."
+                    )
+
+                if pattern.startswith(f"{source_name}."):
+                    map_from_key = pattern.split(f"{source_name}.")[1]
+                    column_mapping[map_from_key] = map_to_key
+
+        result_df = source_df.rename(columns=column_mapping, inplace=inplace)
+
+    return result_df
 
 
 def evaluate(
@@ -97,18 +129,14 @@ def evaluate(
     :rtype: ~azure.ai.generative.evaluate.EvaluationResult
     """
 
-    _validation(target, data, evaluators, output_path, tracking_uri, evaluation_name)
+    _validation(target, data, evaluators, output_path, tracking_uri, evaluation_name, evaluator_config)
 
     pf_client = PFClient()
-    code_client = CodeClient()
 
     evaluator_info = {}
 
     for evaluator_name, evaluator in evaluators.items():
-        if isinstance(evaluator, FunctionType):
-            evaluator_info.update({evaluator_name: {"client": pf_client, "evaluator": evaluator}})
-        else:
-            evaluator_info.update({evaluator_name: {"client": code_client, "evaluator": evaluator}})
+        evaluator_info.update({evaluator_name: {"client": pf_client, "evaluator": evaluator}})
 
         evaluator_info[evaluator_name]["run"] = evaluator_info[evaluator_name]["client"].run(
             flow=evaluator,
diff --git a/src/promptflow-evals/samples/evaluate_test_data.jsonl b/src/promptflow-evals/samples/evaluate_test_data.jsonl
@@ -1,3 +1,3 @@
-{"question":"How do you create a run?","context":"AML API only","answer":"To create a run using the Azure Machine Learning API, you first need to create an Experiment. Once you have an experiment, you can create a Run object that is associated with that experiment. Here is some Python code that demonstrates this process:\n\n```\nfrom azureml.core import Experiment, Run\nfrom azureml.core.workspace import Workspace\n\n# Define workspace and experiment\nws = Workspace.from_config()\nexp = Experiment(workspace=ws, name='my_experiment')\n\n# Create a new run\nrun = exp.start_logging()\n```\n\nIn this code, the `from_config()` method reads the configuration file that you created when you set up your Azure Machine Learning workspace. The `Experiment` constructor creates an Experiment object that is associated with your workspace, and the `start_logging()` method creates a new Run object that is associated with the Experiment. Now you can use the `run` object to log metrics, upload files, and track other information related to your machine learning experiment."}
-{"question":"How do you log a model?","context":"Logging can be done using any OSS Sdk","answer":"There are a few ways to log models in Azure Machine Learning. \n\nOne way is to use the `register_model()` method of the `Run` object. The `register_model()` method logs a model file in the Azure Machine Learning service workspace and makes it available for deployment. Here's an example:\n\n```python\nfrom azureml.core import Model\n\nmodel_path = '.\/outputs\/my_model.pkl'\nmodel = Model.register(workspace=ws, model_path=model_path, model_name='my_model')\n```\n\nThis code registers the model file located at `model_path` to the Azure Machine Learning service workspace with the name `my_model`. \n\nAnother way to log a model is to save it as an output of a `Run`. If your model generation code is part of a script or Jupyter notebook that runs as an Azure Machine Learning experiment, you can save the model file as an output of the `Run` object. Here's an example:\n\n```python\nfrom sklearn.linear_model import LogisticRegression\nfrom azureml.core.run import Run\n\n# Initialize a run object\nrun = Run.get_context()\n\n# Train your model\nX_train, y_train = ...\nclf = LogisticRegression().fit(X_train, y_train)\n\n# Save the model to the Run object's outputs directory\nmodel_path = 'outputs\/model.pkl'\njoblib.dump(value=clf, filename=model_path)\n\n# Log the model as a run artifact\nrun.upload_file(name=model_path, path_or_stream=model_path)\n```\n\nIn this code, `Run.get_context()` retrieves the current run context object, which you can use to track metadata and metrics for the run. After training your model, you can use `joblib.dump()` to save the model to a file, and then log the file as an artifact of the run using `run.upload_file()`."}
-{"question":"What is the capital of France?","context":"France is in Europe","answer":"Paris is the capital of France."}
+{"question": "What is the capital of France?", "context": "France is in Europe", "answer": "Paris is the capital of France.", "ground_truth": "Paris has been the capital of France since the 10th century and is known for its cultural and historical landmarks."}
+{"question": "Who developed the theory of relativity?", "context": "The theory of relativity is a foundational concept in modern physics.", "answer": "Albert Einstein developed the theory of relativity.", "ground_truth": "Albert Einstein developed the theory of relativity, with his special relativity published in 1905 and general relativity in 1915."}
+{"question": "What is the speed of light?", "context": "Light travels at a constant speed in a vacuum.", "answer": "The speed of light is approximately 299,792,458 meters per second.", "ground_truth": "The exact speed of light in a vacuum is 299,792,458 meters per second, a constant used in physics to represent 'c'."}
diff --git a/src/promptflow-evals/samples/evaluation.py b/src/promptflow-evals/samples/evaluation.py
@@ -9,40 +9,6 @@
 from promptflow.evals.evaluators.content_safety import ViolenceEvaluator
 
 
-def built_in_evaluator():
-    # Initialize Azure OpenAI Model Configuration
-    model_config = AzureOpenAIModelConfiguration(
-        azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
-        api_key=os.environ.get("AZURE_OPENAI_KEY"),
-        azure_deployment=os.environ.get("AZURE_OPENAI_DEPLOYMENT"),
-    )
-
-    # Initialzing Relevance Evaluator
-    relevance_eval = RelevanceEvaluator(model_config)
-
-    # Running Relevance Evaluator on single input row
-    relevance_score = relevance_eval(
-        answer="The Alpine Explorer Tent is the most waterproof.",
-        context="From the our product list, the alpine explorer tent is the most waterproof. The Adventure Dining "
-        "Table has higher weight.",
-    )
-
-    pprint(relevance_score)
-
-
-def content_safety_evaluator():
-    # Initialize Project Scope
-    project_scope = {
-        "subscription_id": "e0fd569c-e34a-4249-8c24-e8d723c7f054",
-        "resource_group_name": "rg-test",
-        "project_name": "project-test",
-    }
-
-    violence_eval = ViolenceEvaluator(project_scope)
-    violence_score = violence_eval(question="What is the capital of France?", answer="Paris.")
-    pprint(violence_score)
-
-
 def answer_length(answer, **kwargs):
     return {"value": len(answer)}
 
@@ -95,136 +61,50 @@ def answer_length(answer, **kwargs):
 
     # Using multiple evaluators together using `Evaluate` API
 
+    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "evaluate_test_data.jsonl")
     result = evaluate(
-        data="evaluate_test_data.jsonl",
+        data=path,
         evaluators={
             "answer_length": answer_length,
-            "violence": violence_eval,
+            "relevance": relevance_eval,
+        },
+        evaluator_config={
+            "answer_length": {"answer": "${data.answer}"},
+            "relevance": {"answer": "${data.ground_truth}"},
         },
     )
 
     pprint(result)
-
     """
-    {'metrics': {'outputs.answer_length.value': 879.3333333333334,
-             'outputs.violence.violence_score': 0.0},
- 'rows': [{'inputs.answer': 'To create a run using the Azure Machine Learning '
-                            'API, you first need to create an Experiment. Once '
-                            'you have an experiment, you can create a Run '
-                            'object that is associated with that experiment. '
-                            'Here is some Python code that demonstrates this '
-                            'process:\n'
-                            '\n'
-                            '```\n'
-                            'from azureml.core import Experiment, Run\n'
-                            'from azureml.core.workspace import Workspace\n'
-                            '\n'
-                            '# Define workspace and experiment\n'
-                            'ws = Workspace.from_config()\n'
-                            'exp = Experiment(workspace=ws, '
-                            "name='my_experiment')\n"
-                            '\n'
-                            '# Create a new run\n'
-                            'run = exp.start_logging()\n'
-                            '```\n'
-                            '\n'
-                            'In this code, the `from_config()` method reads '
-                            'the configuration file that you created when you '
-                            'set up your Azure Machine Learning workspace. The '
-                            '`Experiment` constructor creates an Experiment '
-                            'object that is associated with your workspace, '
-                            'and the `start_logging()` method creates a new '
-                            'Run object that is associated with the '
-                            'Experiment. Now you can use the `run` object to '
-                            'log metrics, upload files, and track other '
-                            'information related to your machine learning '
-                            'experiment.',
-           'inputs.context': 'AML API only',
-           'inputs.question': 'How do you create a run?',
-           'outputs.answer_length.value': 981,
-           'outputs.violence.violence': 'Safe',
-           'outputs.violence.violence_reason': "The system's response is "
-                                               'purely technical and '
-                                               'educational, providing '
-                                               'information on how to use the '
-                                               'Azure Machine Learning API. '
-                                               'There is no mention or '
-                                               'implication of violence in the '
-                                               'message.',
-           'outputs.violence.violence_score': 0.0},
-          {'inputs.answer': 'There are a few ways to log models in Azure '
-                            'Machine Learning. \n'
-                            '\n'
-                            'One way is to use the `register_model()` method '
-                            'of the `Run` object. The `register_model()` '
-                            'method logs a model file in the Azure Machine '
-                            'Learning service workspace and makes it available '
-                            "for deployment. Here's an example:\n"
-                            '\n'
-                            '```python\n'
-                            'from azureml.core import Model\n'
-                            '\n'
-                            "model_path = './outputs/my_model.pkl'\n"
-                            'model = Model.register(workspace=ws, '
-                            "model_path=model_path, model_name='my_model')\n"
-                            '```\n'
-                            '\n'
-                            'This code registers the model file located at '
-                            '`model_path` to the Azure Machine Learning '
-                            'service workspace with the name `my_model`. \n'
-                            '\n'
-                            'Another way to log a model is to save it as an '
-                            'output of a `Run`. If your model generation code '
-                            'is part of a script or Jupyter notebook that runs '
-                            'as an Azure Machine Learning experiment, you can '
-                            'save the model file as an output of the `Run` '
-                            "object. Here's an example:\n"
-                            '\n'
-                            '```python\n'
-                            'from sklearn.linear_model import '
-                            'LogisticRegression\n'
-                            'from azureml.core.run import Run\n'
-                            '\n'
-                            '# Initialize a run object\n'
-                            'run = Run.get_context()\n'
-                            '\n'
-                            '# Train your model\n'
-                            'X_train, y_train = ...\n'
-                            'clf = LogisticRegression().fit(X_train, y_train)\n'
-                            '\n'
-                            "# Save the model to the Run object's outputs "
-                            'directory\n'
-                            "model_path = 'outputs/model.pkl'\n"
-                            'joblib.dump(value=clf, filename=model_path)\n'
-                            '\n'
-                            '# Log the model as a run artifact\n'
-                            'run.upload_file(name=model_path, '
-                            'path_or_stream=model_path)\n'
-                            '```\n'
-                            '\n'
-                            'In this code, `Run.get_context()` retrieves the '
-                            'current run context object, which you can use to '
-                            'track metadata and metrics for the run. After '
-                            'training your model, you can use `joblib.dump()` '
-                            'to save the model to a file, and then log the '
-                            'file as an artifact of the run using '
-                            '`run.upload_file()`.',
-           'inputs.context': 'Logging can be done using any OSS Sdk',
-           'inputs.question': 'How do you log a model?',
-           'outputs.answer_length.value': 1626,
-           'outputs.violence.violence': nan,
-           'outputs.violence.violence_reason': nan,
-           'outputs.violence.violence_score': nan},
-          {'inputs.answer': 'Paris is the capital of France.',
+{'metrics': {'answer_length.value': 49.333333333333336,
+             'relevance.gpt_relevance': 5.0},
+ 'rows': [{'inputs.answer': 'Paris is the capital of France.',
            'inputs.context': 'France is in Europe',
+           'inputs.ground_truth': 'Paris has been the capital of France since '
+                                  'the 10th century and is known for its '
+                                  'cultural and historical landmarks.',
            'inputs.question': 'What is the capital of France?',
            'outputs.answer_length.value': 31,
-           'outputs.violence.violence': 'Safe',
-           'outputs.violence.violence_reason': "The system's response is a "
-                                               'straightforward factual answer '
-                                               'to a geography question, '
-                                               'containing no violent language '
-                                               'or content.',
-           'outputs.violence.violence_score': 0.0}],
+           'outputs.relevance.gpt_relevance': 5},
+          {'inputs.answer': 'Albert Einstein developed the theory of '
+                            'relativity.',
+           'inputs.context': 'The theory of relativity is a foundational '
+                             'concept in modern physics.',
+           'inputs.ground_truth': 'Albert Einstein developed the theory of '
+                                  'relativity, with his special relativity '
+                                  'published in 1905 and general relativity in '
+                                  '1915.',
+           'inputs.question': 'Who developed the theory of relativity?',
+           'outputs.answer_length.value': 51,
+           'outputs.relevance.gpt_relevance': 5},
+          {'inputs.answer': 'The speed of light is approximately 299,792,458 '
+                            'meters per second.',
+           'inputs.context': 'Light travels at a constant speed in a vacuum.',
+           'inputs.ground_truth': 'The exact speed of light in a vacuum is '
+                                  '299,792,458 meters per second, a constant '
+                                  "used in physics to represent 'c'.",
+           'inputs.question': 'What is the speed of light?',
+           'outputs.answer_length.value': 66,
+           'outputs.relevance.gpt_relevance': 5}],
  'traces': {}}
     """
diff --git a/src/promptflow-evals/tests/evals/e2etests/data/column_mapping_test_data.jsonl b/src/promptflow-evals/tests/evals/e2etests/data/column_mapping_test_data.jsonl
@@ -0,0 +1,3 @@
+{"request": "What is the capital of France?", "context": "France is in Europe", "response": "Paris is the capital of France.", "ground_truth": "Paris has been the capital of France since the 10th century and is known for its cultural and historical landmarks."}
+{"request": "Who developed the theory of relativity?", "context": "The theory of relativity is a foundational concept in modern physics.", "response": "Albert Einstein developed the theory of relativity.", "ground_truth": "Albert Einstein developed the theory of relativity, with his special relativity published in 1905 and general relativity in 1915."}
+{"request": "What is the speed of light?", "context": "Light travels at a constant speed in a vacuum.", "response": "The speed of light is approximately 299,792,458 meters per second.", "ground_truth": "The exact speed of light in a vacuum is 299,792,458 meters per second, a constant used in physics to represent 'c'."}
diff --git a/src/promptflow-evals/tests/evals/e2etests/test_evaluate.py b/src/promptflow-evals/tests/evals/e2etests/test_evaluate.py
diff --git a/src/promptflow-evals/tests/evals/unittests/test_evaluate.py b/src/promptflow-evals/tests/evals/unittests/test_evaluate.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+{"request": "What is the capital of France?", "context": "France is in Europe", "response": "Paris is the capital of France.", "ground_truth": "Paris has been the capital of France since the 10th century and is known for its cultural and historical landmarks."}`
	`2`	`+{"request": "Who developed the theory of relativity?", "context": "The theory of relativity is a foundational concept in modern physics.", "response": "Albert Einstein developed the theory of relativity.", "ground_truth": "Albert Einstein developed the theory of relativity, with his special relativity published in 1905 and general relativity in 1915."}`
	`3`	`+{"request": "What is the speed of light?", "context": "Light travels at a constant speed in a vacuum.", "response": "The speed of light is approximately 299,792,458 meters per second.", "ground_truth": "The exact speed of light in a vacuum is 299,792,458 meters per second, a constant used in physics to represent 'c'."}`