Skip to content

Commit

Permalink
Convert prompty based evaluators to async based implementation (#3557)
Browse files Browse the repository at this point in the history
# Description

Please add an informative description that covers that changes made by
the pull request and link all relevant issues.

# All Promptflow Contribution checklist:
- [ ] **The pull request does not introduce [breaking changes].**
- [ ] **CHANGELOG is updated for new features, bug fixes or other
significant changes.**
- [ ] **I have read the [contribution guidelines](../CONTRIBUTING.md).**
- [ ] **I confirm that all new dependencies are compatible with the MIT
license.**
- [ ] **Create an issue and link to the pull request to get dedicated
review from promptflow team. Learn more: [suggested
workflow](../CONTRIBUTING.md#suggested-workflow).**

## General Guidelines and Best Practices
- [ ] Title of the pull request is clear and informative.
- [ ] There are a small number of commits, each of which have an
informative message. This means that previously merged commits do not
appear in the history of the PR. For more information on cleaning up the
commits in your PR, [see this
page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md).

### Testing Guidelines
- [ ] Pull request includes test coverage for the included changes.
  • Loading branch information
ninghu authored Jul 18, 2024
1 parent cac471a commit de5aa0f
Show file tree
Hide file tree
Showing 16 changed files with 658 additions and 2,372 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,48 @@

import numpy as np

from promptflow.client import load_flow
from promptflow.core import AzureOpenAIModelConfiguration
from promptflow._utils.async_utils import async_run_allowing_running_loop
from promptflow.core import AsyncPrompty, AzureOpenAIModelConfiguration

try:
from ..._user_agent import USER_AGENT
except ImportError:
USER_AGENT = None


class _AsyncCoherenceEvaluator:
def __init__(self, model_config: AzureOpenAIModelConfiguration):
if model_config.api_version is None:
model_config.api_version = "2024-02-15-preview"

prompty_model_config = {"configuration": model_config}
prompty_model_config.update(
{"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}}
) if USER_AGENT and isinstance(model_config, AzureOpenAIModelConfiguration) else None
current_dir = os.path.dirname(__file__)
prompty_path = os.path.join(current_dir, "coherence.prompty")
self._flow = AsyncPrompty.load(source=prompty_path, model=prompty_model_config)

async def __call__(self, *, question: str, answer: str, **kwargs):
# Validate input parameters
question = str(question or "")
answer = str(answer or "")

if not (question.strip() and answer.strip()):
raise ValueError("Both 'question' and 'answer' must be non-empty strings.")

# Run the evaluation flow
llm_output = await self._flow(question=question, answer=answer)

score = np.nan
if llm_output:
match = re.search(r"\d", llm_output)
if match:
score = float(match.group())

return {"gpt_coherence": float(score)}


class CoherenceEvaluator:
"""
Initialize a coherence evaluator configured for a specific Azure OpenAI model.
Expand All @@ -41,18 +75,7 @@ class CoherenceEvaluator:
"""

def __init__(self, model_config: AzureOpenAIModelConfiguration):
# TODO: Remove this block once the bug is fixed
# https://msdata.visualstudio.com/Vienna/_workitems/edit/3151324
if model_config.api_version is None:
model_config.api_version = "2024-02-15-preview"

prompty_model_config = {"configuration": model_config}
prompty_model_config.update({"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}}) \
if USER_AGENT and isinstance(model_config, AzureOpenAIModelConfiguration) else None

current_dir = os.path.dirname(__file__)
prompty_path = os.path.join(current_dir, "coherence.prompty")
self._flow = load_flow(source=prompty_path, model=prompty_model_config)
self._async_evaluator = _AsyncCoherenceEvaluator(model_config)

def __call__(self, *, question: str, answer: str, **kwargs):
"""
Expand All @@ -65,21 +88,7 @@ def __call__(self, *, question: str, answer: str, **kwargs):
:return: The coherence score.
:rtype: dict
"""
return async_run_allowing_running_loop(self._async_evaluator, question=question, answer=answer, **kwargs)

# Validate input parameters
question = str(question or "")
answer = str(answer or "")

if not (question.strip() and answer.strip()):
raise ValueError("Both 'question' and 'answer' must be non-empty strings.")

# Run the evaluation flow
llm_output = self._flow(question=question, answer=answer)

score = np.nan
if llm_output:
match = re.search(r"\d", llm_output)
if match:
score = float(match.group())

return {"gpt_coherence": float(score)}
def _to_async(self):
return self._async_evaluator
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,48 @@

import numpy as np

from promptflow.client import load_flow
from promptflow.core import AzureOpenAIModelConfiguration
from promptflow._utils.async_utils import async_run_allowing_running_loop
from promptflow.core import AsyncPrompty, AzureOpenAIModelConfiguration

try:
from ..._user_agent import USER_AGENT
except ImportError:
USER_AGENT = None


class _AsyncGroundednessEvaluator:
def __init__(self, model_config: AzureOpenAIModelConfiguration):
if model_config.api_version is None:
model_config.api_version = "2024-02-15-preview"

prompty_model_config = {"configuration": model_config}
prompty_model_config.update(
{"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}}
) if USER_AGENT and isinstance(model_config, AzureOpenAIModelConfiguration) else None
current_dir = os.path.dirname(__file__)
prompty_path = os.path.join(current_dir, "groundedness.prompty")
self._flow = AsyncPrompty.load(source=prompty_path, model=prompty_model_config)

async def __call__(self, *, answer: str, context: str, **kwargs):
# Validate input parameters
answer = str(answer or "")
context = str(context or "")

if not (answer.strip()) or not (context.strip()):
raise ValueError("Both 'answer' and 'context' must be non-empty strings.")

# Run the evaluation flow
llm_output = await self._flow(answer=answer, context=context)

score = np.nan
if llm_output:
match = re.search(r"\d", llm_output)
if match:
score = float(match.group())

return {"gpt_groundedness": float(score)}


class GroundednessEvaluator:
"""
Initialize a groundedness evaluator configured for a specific Azure OpenAI model.
Expand Down Expand Up @@ -42,19 +76,7 @@ class GroundednessEvaluator:
"""

def __init__(self, model_config: AzureOpenAIModelConfiguration):
# TODO: Remove this block once the bug is fixed
# https://msdata.visualstudio.com/Vienna/_workitems/edit/3151324
if model_config.api_version is None:
model_config.api_version = "2024-02-15-preview"

prompty_model_config = {"configuration": model_config}

prompty_model_config.update({"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}}) \
if USER_AGENT and isinstance(model_config, AzureOpenAIModelConfiguration) else None

current_dir = os.path.dirname(__file__)
prompty_path = os.path.join(current_dir, "groundedness.prompty")
self._flow = load_flow(source=prompty_path, model=prompty_model_config)
self._async_evaluator = _AsyncGroundednessEvaluator(model_config)

def __call__(self, *, answer: str, context: str, **kwargs):
"""
Expand All @@ -67,20 +89,7 @@ def __call__(self, *, answer: str, context: str, **kwargs):
:return: The groundedness score.
:rtype: dict
"""
# Validate input parameters
answer = str(answer or "")
context = str(context or "")
return async_run_allowing_running_loop(self._async_evaluator, answer=answer, context=context, **kwargs)

if not (answer.strip()) or not (context.strip()):
raise ValueError("Both 'answer' and 'context' must be non-empty strings.")

# Run the evaluation flow
llm_output = self._flow(answer=answer, context=context)

score = np.nan
if llm_output:
match = re.search(r"\d", llm_output)
if match:
score = float(match.group())

return {"gpt_groundedness": float(score)}
def _to_async(self):
return self._async_evaluator
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,49 @@

import numpy as np

from promptflow.client import load_flow
from promptflow.core import AzureOpenAIModelConfiguration
from promptflow._utils.async_utils import async_run_allowing_running_loop
from promptflow.core import AsyncPrompty, AzureOpenAIModelConfiguration

try:
from ..._user_agent import USER_AGENT
except ImportError:
USER_AGENT = None


class _AsyncRelevanceEvaluator:
def __init__(self, model_config: AzureOpenAIModelConfiguration):
if model_config.api_version is None:
model_config.api_version = "2024-02-15-preview"

prompty_model_config = {"configuration": model_config}
prompty_model_config.update(
{"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}}
) if USER_AGENT and isinstance(model_config, AzureOpenAIModelConfiguration) else None
current_dir = os.path.dirname(__file__)
prompty_path = os.path.join(current_dir, "relevance.prompty")
self._flow = AsyncPrompty.load(source=prompty_path, model=prompty_model_config)

async def __call__(self, *, question: str, answer: str, context: str, **kwargs):
# Validate input parameters
question = str(question or "")
answer = str(answer or "")
context = str(context or "")

if not (question.strip() and answer.strip() and context.strip()):
raise ValueError("'question', 'answer' and 'context' must be non-empty strings.")

# Run the evaluation flow
llm_output = await self._flow(question=question, answer=answer, context=context)

score = np.nan
if llm_output:
match = re.search(r"\d", llm_output)
if match:
score = float(match.group())

return {"gpt_relevance": float(score)}


class RelevanceEvaluator:
"""
Initialize a relevance evaluator configured for a specific Azure OpenAI model.
Expand Down Expand Up @@ -43,21 +78,7 @@ class RelevanceEvaluator:
"""

def __init__(self, model_config: AzureOpenAIModelConfiguration):
# TODO: Remove this block once the bug is fixed
# https://msdata.visualstudio.com/Vienna/_workitems/edit/3151324
if model_config.api_version is None:
model_config.api_version = "2024-02-15-preview"

prompty_model_config = {
"configuration": model_config,
}

prompty_model_config.update({"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}})\
if USER_AGENT and isinstance(model_config, AzureOpenAIModelConfiguration) else None

current_dir = os.path.dirname(__file__)
prompty_path = os.path.join(current_dir, "relevance.prompty")
self._flow = load_flow(source=prompty_path, model=prompty_model_config)
self._async_evaluator = _AsyncRelevanceEvaluator(model_config)

def __call__(self, *, question: str, answer: str, context: str, **kwargs):
"""
Expand All @@ -72,21 +93,9 @@ def __call__(self, *, question: str, answer: str, context: str, **kwargs):
:return: The relevance score.
:rtype: dict
"""
# Validate input parameters
question = str(question or "")
answer = str(answer or "")
context = str(context or "")
return async_run_allowing_running_loop(
self._async_evaluator, question=question, answer=answer, context=context, **kwargs
)

if not (question.strip() and answer.strip() and context.strip()):
raise ValueError("'question', 'answer' and 'context' must be non-empty strings.")

# Run the evaluation flow
llm_output = self._flow(question=question, answer=answer, context=context)

score = np.nan
if llm_output:
match = re.search(r"\d", llm_output)
if match:
score = float(match.group())

return {"gpt_relevance": float(score)}
def _to_async(self):
return self._async_evaluator
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,49 @@

import numpy as np

from promptflow.client import load_flow
from promptflow.core import AzureOpenAIModelConfiguration
from promptflow._utils.async_utils import async_run_allowing_running_loop
from promptflow.core import AsyncPrompty, AzureOpenAIModelConfiguration

try:
from ..._user_agent import USER_AGENT
except ImportError:
USER_AGENT = None


class _AsyncSimilarityEvaluator:
def __init__(self, model_config: AzureOpenAIModelConfiguration):
if model_config.api_version is None:
model_config.api_version = "2024-02-15-preview"

prompty_model_config = {"configuration": model_config}
prompty_model_config.update(
{"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}}
) if USER_AGENT and isinstance(model_config, AzureOpenAIModelConfiguration) else None
current_dir = os.path.dirname(__file__)
prompty_path = os.path.join(current_dir, "similarity.prompty")
self._flow = AsyncPrompty.load(source=prompty_path, model=prompty_model_config)

async def __call__(self, *, question: str, answer: str, ground_truth: str, **kwargs):
# Validate input parameters
question = str(question or "")
answer = str(answer or "")
ground_truth = str(ground_truth or "")

if not (question.strip() and answer.strip() and ground_truth.strip()):
raise ValueError("'question', 'answer' and 'ground_truth' must be non-empty strings.")

# Run the evaluation flow
llm_output = await self._flow(question=question, answer=answer, ground_truth=ground_truth)

score = np.nan
if llm_output:
match = re.search(r"\d", llm_output)
if match:
score = float(match.group())

return {"gpt_similarity": float(score)}


class SimilarityEvaluator:
"""
Initialize a similarity evaluator configured for a specific Azure OpenAI model.
Expand Down Expand Up @@ -42,17 +77,7 @@ class SimilarityEvaluator:
"""

def __init__(self, model_config: AzureOpenAIModelConfiguration):
# TODO: Remove this block once the bug is fixed
# https://msdata.visualstudio.com/Vienna/_workitems/edit/3151324
if model_config.api_version is None:
model_config.api_version = "2024-02-15-preview"

prompty_model_config = {"configuration": model_config}
prompty_model_config.update({"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}}) \
if USER_AGENT and isinstance(model_config, AzureOpenAIModelConfiguration) else None
current_dir = os.path.dirname(__file__)
prompty_path = os.path.join(current_dir, "similarity.prompty")
self._flow = load_flow(source=prompty_path, model=prompty_model_config)
self._async_evaluator = _AsyncSimilarityEvaluator(model_config)

def __call__(self, *, question: str, answer: str, ground_truth: str, **kwargs):
"""
Expand All @@ -67,21 +92,9 @@ def __call__(self, *, question: str, answer: str, ground_truth: str, **kwargs):
:return: The similarity score.
:rtype: dict
"""
# Validate input parameters
question = str(question or "")
answer = str(answer or "")
ground_truth = str(ground_truth or "")
return async_run_allowing_running_loop(
self._async_evaluator, question=question, answer=answer, ground_truth=ground_truth, **kwargs
)

if not (question.strip() and answer.strip() and ground_truth.strip()):
raise ValueError("'question', 'answer' and 'ground_truth' must be non-empty strings.")

# Run the evaluation flow
llm_output = self._flow(question=question, answer=answer, ground_truth=ground_truth)

score = np.nan
if llm_output:
match = re.search(r"\d", llm_output)
if match:
score = float(match.group())

return {"gpt_similarity": float(score)}
def _to_async(self):
return self._async_evaluator
Loading

0 comments on commit de5aa0f

Please sign in to comment.