Convert prompty based evaluators to async based implementation (#3557)

# Description Please add an informative description that covers that changes made by the pull request and link all relevant issues. # All Promptflow Contribution checklist: - [ ] **The pull request does not introduce [breaking changes].** - [ ] **CHANGELOG is updated for new features, bug fixes or other significant changes.** - [ ] **I have read the [contribution guidelines](../CONTRIBUTING.md).** - [ ] **I confirm that all new dependencies are compatible with the MIT license.** - [ ] **Create an issue and link to the pull request to get dedicated review from promptflow team. Learn more: [suggested workflow](../CONTRIBUTING.md#suggested-workflow).** ## General Guidelines and Best Practices - [ ] Title of the pull request is clear and informative. - [ ] There are a small number of commits, each of which have an informative message. This means that previously merged commits do not appear in the history of the PR. For more information on cleaning up the commits in your PR, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md). ### Testing Guidelines - [ ] Pull request includes test coverage for the included changes.
microsoft · Jul 18, 2024 · de5aa0f · de5aa0f
1 parent cac471a
commit de5aa0f
Show file tree

Hide file tree

Showing 16 changed files with 658 additions and 2,372 deletions.
diff --git a/src/promptflow-evals/promptflow/evals/evaluators/_coherence/_coherence.py b/src/promptflow-evals/promptflow/evals/evaluators/_coherence/_coherence.py
@@ -7,14 +7,48 @@
 
 import numpy as np
 
-from promptflow.client import load_flow
-from promptflow.core import AzureOpenAIModelConfiguration
+from promptflow._utils.async_utils import async_run_allowing_running_loop
+from promptflow.core import AsyncPrompty, AzureOpenAIModelConfiguration
+
 try:
     from ..._user_agent import USER_AGENT
 except ImportError:
     USER_AGENT = None
 
 
+class _AsyncCoherenceEvaluator:
+    def __init__(self, model_config: AzureOpenAIModelConfiguration):
+        if model_config.api_version is None:
+            model_config.api_version = "2024-02-15-preview"
+
+        prompty_model_config = {"configuration": model_config}
+        prompty_model_config.update(
+            {"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}}
+        ) if USER_AGENT and isinstance(model_config, AzureOpenAIModelConfiguration) else None
+        current_dir = os.path.dirname(__file__)
+        prompty_path = os.path.join(current_dir, "coherence.prompty")
+        self._flow = AsyncPrompty.load(source=prompty_path, model=prompty_model_config)
+
+    async def __call__(self, *, question: str, answer: str, **kwargs):
+        # Validate input parameters
+        question = str(question or "")
+        answer = str(answer or "")
+
+        if not (question.strip() and answer.strip()):
+            raise ValueError("Both 'question' and 'answer' must be non-empty strings.")
+
+        # Run the evaluation flow
+        llm_output = await self._flow(question=question, answer=answer)
+
+        score = np.nan
+        if llm_output:
+            match = re.search(r"\d", llm_output)
+            if match:
+                score = float(match.group())
+
+        return {"gpt_coherence": float(score)}
+
+
 class CoherenceEvaluator:
     """
     Initialize a coherence evaluator configured for a specific Azure OpenAI model.
@@ -41,18 +75,7 @@ class CoherenceEvaluator:
     """
 
     def __init__(self, model_config: AzureOpenAIModelConfiguration):
-        # TODO: Remove this block once the bug is fixed
-        # https://msdata.visualstudio.com/Vienna/_workitems/edit/3151324
-        if model_config.api_version is None:
-            model_config.api_version = "2024-02-15-preview"
-
-        prompty_model_config = {"configuration": model_config}
-        prompty_model_config.update({"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}}) \
-            if USER_AGENT and isinstance(model_config, AzureOpenAIModelConfiguration) else None
-
-        current_dir = os.path.dirname(__file__)
-        prompty_path = os.path.join(current_dir, "coherence.prompty")
-        self._flow = load_flow(source=prompty_path, model=prompty_model_config)
+        self._async_evaluator = _AsyncCoherenceEvaluator(model_config)
 
     def __call__(self, *, question: str, answer: str, **kwargs):
         """
@@ -65,21 +88,7 @@ def __call__(self, *, question: str, answer: str, **kwargs):
         :return: The coherence score.
         :rtype: dict
         """
+        return async_run_allowing_running_loop(self._async_evaluator, question=question, answer=answer, **kwargs)
 
-        # Validate input parameters
-        question = str(question or "")
-        answer = str(answer or "")
-
-        if not (question.strip() and answer.strip()):
-            raise ValueError("Both 'question' and 'answer' must be non-empty strings.")
-
-        # Run the evaluation flow
-        llm_output = self._flow(question=question, answer=answer)
-
-        score = np.nan
-        if llm_output:
-            match = re.search(r"\d", llm_output)
-            if match:
-                score = float(match.group())
-
-        return {"gpt_coherence": float(score)}
+    def _to_async(self):
+        return self._async_evaluator
diff --git a/src/promptflow-evals/promptflow/evals/evaluators/_groundedness/_groundedness.py b/src/promptflow-evals/promptflow/evals/evaluators/_groundedness/_groundedness.py
@@ -7,14 +7,48 @@
 
 import numpy as np
 
-from promptflow.client import load_flow
-from promptflow.core import AzureOpenAIModelConfiguration
+from promptflow._utils.async_utils import async_run_allowing_running_loop
+from promptflow.core import AsyncPrompty, AzureOpenAIModelConfiguration
+
 try:
     from ..._user_agent import USER_AGENT
 except ImportError:
     USER_AGENT = None
 
 
+class _AsyncGroundednessEvaluator:
+    def __init__(self, model_config: AzureOpenAIModelConfiguration):
+        if model_config.api_version is None:
+            model_config.api_version = "2024-02-15-preview"
+
+        prompty_model_config = {"configuration": model_config}
+        prompty_model_config.update(
+            {"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}}
+        ) if USER_AGENT and isinstance(model_config, AzureOpenAIModelConfiguration) else None
+        current_dir = os.path.dirname(__file__)
+        prompty_path = os.path.join(current_dir, "groundedness.prompty")
+        self._flow = AsyncPrompty.load(source=prompty_path, model=prompty_model_config)
+
+    async def __call__(self, *, answer: str, context: str, **kwargs):
+        # Validate input parameters
+        answer = str(answer or "")
+        context = str(context or "")
+
+        if not (answer.strip()) or not (context.strip()):
+            raise ValueError("Both 'answer' and 'context' must be non-empty strings.")
+
+        # Run the evaluation flow
+        llm_output = await self._flow(answer=answer, context=context)
+
+        score = np.nan
+        if llm_output:
+            match = re.search(r"\d", llm_output)
+            if match:
+                score = float(match.group())
+
+        return {"gpt_groundedness": float(score)}
+
+
 class GroundednessEvaluator:
     """
     Initialize a groundedness evaluator configured for a specific Azure OpenAI model.
@@ -42,19 +76,7 @@ class GroundednessEvaluator:
     """
 
     def __init__(self, model_config: AzureOpenAIModelConfiguration):
-        # TODO: Remove this block once the bug is fixed
-        # https://msdata.visualstudio.com/Vienna/_workitems/edit/3151324
-        if model_config.api_version is None:
-            model_config.api_version = "2024-02-15-preview"
-
-        prompty_model_config = {"configuration": model_config}
-
-        prompty_model_config.update({"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}}) \
-            if USER_AGENT and isinstance(model_config, AzureOpenAIModelConfiguration) else None
-
-        current_dir = os.path.dirname(__file__)
-        prompty_path = os.path.join(current_dir, "groundedness.prompty")
-        self._flow = load_flow(source=prompty_path, model=prompty_model_config)
+        self._async_evaluator = _AsyncGroundednessEvaluator(model_config)
 
     def __call__(self, *, answer: str, context: str, **kwargs):
         """
@@ -67,20 +89,7 @@ def __call__(self, *, answer: str, context: str, **kwargs):
         :return: The groundedness score.
         :rtype: dict
         """
-        # Validate input parameters
-        answer = str(answer or "")
-        context = str(context or "")
+        return async_run_allowing_running_loop(self._async_evaluator, answer=answer, context=context, **kwargs)
 
-        if not (answer.strip()) or not (context.strip()):
-            raise ValueError("Both 'answer' and 'context' must be non-empty strings.")
-
-        # Run the evaluation flow
-        llm_output = self._flow(answer=answer, context=context)
-
-        score = np.nan
-        if llm_output:
-            match = re.search(r"\d", llm_output)
-            if match:
-                score = float(match.group())
-
-        return {"gpt_groundedness": float(score)}
+    def _to_async(self):
+        return self._async_evaluator
diff --git a/src/promptflow-evals/promptflow/evals/evaluators/_relevance/_relevance.py b/src/promptflow-evals/promptflow/evals/evaluators/_relevance/_relevance.py
@@ -7,14 +7,49 @@
 
 import numpy as np
 
-from promptflow.client import load_flow
-from promptflow.core import AzureOpenAIModelConfiguration
+from promptflow._utils.async_utils import async_run_allowing_running_loop
+from promptflow.core import AsyncPrompty, AzureOpenAIModelConfiguration
+
 try:
     from ..._user_agent import USER_AGENT
 except ImportError:
     USER_AGENT = None
 
 
+class _AsyncRelevanceEvaluator:
+    def __init__(self, model_config: AzureOpenAIModelConfiguration):
+        if model_config.api_version is None:
+            model_config.api_version = "2024-02-15-preview"
+
+        prompty_model_config = {"configuration": model_config}
+        prompty_model_config.update(
+            {"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}}
+        ) if USER_AGENT and isinstance(model_config, AzureOpenAIModelConfiguration) else None
+        current_dir = os.path.dirname(__file__)
+        prompty_path = os.path.join(current_dir, "relevance.prompty")
+        self._flow = AsyncPrompty.load(source=prompty_path, model=prompty_model_config)
+
+    async def __call__(self, *, question: str, answer: str, context: str, **kwargs):
+        # Validate input parameters
+        question = str(question or "")
+        answer = str(answer or "")
+        context = str(context or "")
+
+        if not (question.strip() and answer.strip() and context.strip()):
+            raise ValueError("'question', 'answer' and 'context' must be non-empty strings.")
+
+        # Run the evaluation flow
+        llm_output = await self._flow(question=question, answer=answer, context=context)
+
+        score = np.nan
+        if llm_output:
+            match = re.search(r"\d", llm_output)
+            if match:
+                score = float(match.group())
+
+        return {"gpt_relevance": float(score)}
+
+
 class RelevanceEvaluator:
     """
     Initialize a relevance evaluator configured for a specific Azure OpenAI model.
@@ -43,21 +78,7 @@ class RelevanceEvaluator:
     """
 
     def __init__(self, model_config: AzureOpenAIModelConfiguration):
-        # TODO: Remove this block once the bug is fixed
-        # https://msdata.visualstudio.com/Vienna/_workitems/edit/3151324
-        if model_config.api_version is None:
-            model_config.api_version = "2024-02-15-preview"
-
-        prompty_model_config = {
-            "configuration": model_config,
-        }
-
-        prompty_model_config.update({"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}})\
-            if USER_AGENT and isinstance(model_config, AzureOpenAIModelConfiguration) else None
-
-        current_dir = os.path.dirname(__file__)
-        prompty_path = os.path.join(current_dir, "relevance.prompty")
-        self._flow = load_flow(source=prompty_path, model=prompty_model_config)
+        self._async_evaluator = _AsyncRelevanceEvaluator(model_config)
 
     def __call__(self, *, question: str, answer: str, context: str, **kwargs):
         """
@@ -72,21 +93,9 @@ def __call__(self, *, question: str, answer: str, context: str, **kwargs):
         :return: The relevance score.
         :rtype: dict
         """
-        # Validate input parameters
-        question = str(question or "")
-        answer = str(answer or "")
-        context = str(context or "")
+        return async_run_allowing_running_loop(
+            self._async_evaluator, question=question, answer=answer, context=context, **kwargs
+        )
 
-        if not (question.strip() and answer.strip() and context.strip()):
-            raise ValueError("'question', 'answer' and 'context' must be non-empty strings.")
-
-        # Run the evaluation flow
-        llm_output = self._flow(question=question, answer=answer, context=context)
-
-        score = np.nan
-        if llm_output:
-            match = re.search(r"\d", llm_output)
-            if match:
-                score = float(match.group())
-
-        return {"gpt_relevance": float(score)}
+    def _to_async(self):
+        return self._async_evaluator
diff --git a/src/promptflow-evals/promptflow/evals/evaluators/_similarity/_similarity.py b/src/promptflow-evals/promptflow/evals/evaluators/_similarity/_similarity.py
@@ -7,14 +7,49 @@
 
 import numpy as np
 
-from promptflow.client import load_flow
-from promptflow.core import AzureOpenAIModelConfiguration
+from promptflow._utils.async_utils import async_run_allowing_running_loop
+from promptflow.core import AsyncPrompty, AzureOpenAIModelConfiguration
+
 try:
     from ..._user_agent import USER_AGENT
 except ImportError:
     USER_AGENT = None
 
 
+class _AsyncSimilarityEvaluator:
+    def __init__(self, model_config: AzureOpenAIModelConfiguration):
+        if model_config.api_version is None:
+            model_config.api_version = "2024-02-15-preview"
+
+        prompty_model_config = {"configuration": model_config}
+        prompty_model_config.update(
+            {"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}}
+        ) if USER_AGENT and isinstance(model_config, AzureOpenAIModelConfiguration) else None
+        current_dir = os.path.dirname(__file__)
+        prompty_path = os.path.join(current_dir, "similarity.prompty")
+        self._flow = AsyncPrompty.load(source=prompty_path, model=prompty_model_config)
+
+    async def __call__(self, *, question: str, answer: str, ground_truth: str, **kwargs):
+        # Validate input parameters
+        question = str(question or "")
+        answer = str(answer or "")
+        ground_truth = str(ground_truth or "")
+
+        if not (question.strip() and answer.strip() and ground_truth.strip()):
+            raise ValueError("'question', 'answer' and 'ground_truth' must be non-empty strings.")
+
+        # Run the evaluation flow
+        llm_output = await self._flow(question=question, answer=answer, ground_truth=ground_truth)
+
+        score = np.nan
+        if llm_output:
+            match = re.search(r"\d", llm_output)
+            if match:
+                score = float(match.group())
+
+        return {"gpt_similarity": float(score)}
+
+
 class SimilarityEvaluator:
     """
     Initialize a similarity evaluator configured for a specific Azure OpenAI model.
@@ -42,17 +77,7 @@ class SimilarityEvaluator:
     """
 
     def __init__(self, model_config: AzureOpenAIModelConfiguration):
-        # TODO: Remove this block once the bug is fixed
-        # https://msdata.visualstudio.com/Vienna/_workitems/edit/3151324
-        if model_config.api_version is None:
-            model_config.api_version = "2024-02-15-preview"
-
-        prompty_model_config = {"configuration": model_config}
-        prompty_model_config.update({"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}}) \
-            if USER_AGENT and isinstance(model_config, AzureOpenAIModelConfiguration) else None
-        current_dir = os.path.dirname(__file__)
-        prompty_path = os.path.join(current_dir, "similarity.prompty")
-        self._flow = load_flow(source=prompty_path, model=prompty_model_config)
+        self._async_evaluator = _AsyncSimilarityEvaluator(model_config)
 
     def __call__(self, *, question: str, answer: str, ground_truth: str, **kwargs):
         """
@@ -67,21 +92,9 @@ def __call__(self, *, question: str, answer: str, ground_truth: str, **kwargs):
         :return: The similarity score.
         :rtype: dict
         """
-        # Validate input parameters
-        question = str(question or "")
-        answer = str(answer or "")
-        ground_truth = str(ground_truth or "")
+        return async_run_allowing_running_loop(
+            self._async_evaluator, question=question, answer=answer, ground_truth=ground_truth, **kwargs
+        )
 
-        if not (question.strip() and answer.strip() and ground_truth.strip()):
-            raise ValueError("'question', 'answer' and 'ground_truth' must be non-empty strings.")
-
-        # Run the evaluation flow
-        llm_output = self._flow(question=question, answer=answer, ground_truth=ground_truth)
-
-        score = np.nan
-        if llm_output:
-            match = re.search(r"\d", llm_output)
-            if match:
-                score = float(match.group())
-
-        return {"gpt_similarity": float(score)}
+    def _to_async(self):
+        return self._async_evaluator