diff --git a/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py index 7878adf5c77..e2fc2b8066a 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py @@ -11,7 +11,6 @@ import numpy as np -from promptflow.core import AzureOpenAIModelConfiguration from promptflow.evals.evaluators import CoherenceEvaluator, FluencyEvaluator, GroundednessEvaluator, RelevanceEvaluator logger = logging.getLogger(__name__) @@ -19,7 +18,7 @@ class ChatEvaluator: def __init__( - self, model_config: AzureOpenAIModelConfiguration, eval_last_turn: bool = False, parallel: bool = True + self, model_config, eval_last_turn: bool = False, parallel: bool = True ): """ Initialize an evaluator configured for a specific Azure OpenAI model. @@ -64,7 +63,7 @@ def __init__( FluencyEvaluator(model_config), ] - def __call__(self, *, conversation: List[Dict], **kwargs): + def __call__(self, *, conversation, **kwargs): """Evaluates chat scenario. :param conversation: The conversation to be evaluated. Each turn should have "role" and "content" keys. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/coherence/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/coherence/__init__.py index 2fb81de63b0..023a52845d8 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/coherence/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/coherence/__init__.py @@ -7,12 +7,11 @@ from pathlib import Path from promptflow.client import load_flow -from promptflow.core import AzureOpenAIModelConfiguration from promptflow.core._prompty_utils import convert_model_configuration_to_connection class CoherenceEvaluator: - def __init__(self, model_config: AzureOpenAIModelConfiguration): + def __init__(self, model_config): """ Initialize an evaluator configured for a specific Azure OpenAI model. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/hate_unfairness.py b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/hate_unfairness.py index 6cb9467533f..323df141d2e 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/hate_unfairness.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/hate_unfairness.py @@ -1,12 +1,10 @@ from promptflow.client import load_flow from pathlib import Path -from azure.core.credentials import TokenCredential from promptflow.evals._constants import EvaluationMetrics -from typing import Optional class HateUnfairnessEvaluator: - def __init__(self, project_scope: dict, credential: Optional[TokenCredential] = None): + def __init__(self, project_scope: dict, credential=None): """ Initialize an evaluator for hate unfairness score. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/self_harm.py b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/self_harm.py index bb01f05dd55..b2ff8554bfd 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/self_harm.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/self_harm.py @@ -1,12 +1,10 @@ from promptflow.client import load_flow from pathlib import Path -from azure.core.credentials import TokenCredential from promptflow.evals._constants import EvaluationMetrics -from typing import Optional class SelfHarmEvaluator: - def __init__(self, project_scope: dict, credential: Optional[TokenCredential] = None): + def __init__(self, project_scope: dict, credential=None): """ Initialize an evaluator for self harm score. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/sexual.py b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/sexual.py index fb0d3f79d71..b1247369cdc 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/sexual.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/sexual.py @@ -1,12 +1,10 @@ from promptflow.client import load_flow from pathlib import Path -from azure.core.credentials import TokenCredential from promptflow.evals._constants import EvaluationMetrics -from typing import Optional class SexualEvaluator: - def __init__(self, project_scope: dict, credential: Optional[TokenCredential] = None): + def __init__(self, project_scope: dict, credential=None): """ Initialize an evaluator for sexual score. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/violence.py b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/violence.py index 47382d2c330..29bc631c866 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/violence.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/violence.py @@ -1,12 +1,10 @@ from promptflow.client import load_flow from pathlib import Path -from azure.core.credentials import TokenCredential from promptflow.evals._constants import EvaluationMetrics -from typing import Optional class ViolenceEvaluator: - def __init__(self, project_scope: dict, credential: Optional[TokenCredential] = None): + def __init__(self, project_scope: dict, credential=None): """ Initialize an evaluator for violence score. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/fluency/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/fluency/__init__.py index 54300057cf0..4d8fc742c03 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/fluency/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/fluency/__init__.py @@ -7,12 +7,11 @@ from pathlib import Path from promptflow.client import load_flow -from promptflow.core import AzureOpenAIModelConfiguration from promptflow.core._prompty_utils import convert_model_configuration_to_connection class FluencyEvaluator: - def __init__(self, model_config: AzureOpenAIModelConfiguration): + def __init__(self, model_config): """ Initialize an evaluator configured for a specific Azure OpenAI model. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/groundedness/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/groundedness/__init__.py index f876a20c5bb..5023ee640cc 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/groundedness/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/groundedness/__init__.py @@ -7,12 +7,11 @@ from pathlib import Path from promptflow.client import load_flow -from promptflow.core import AzureOpenAIModelConfiguration from promptflow.core._prompty_utils import convert_model_configuration_to_connection class GroundednessEvaluator: - def __init__(self, model_config: AzureOpenAIModelConfiguration): + def __init__(self, model_config): """ Initialize an evaluator configured for a specific Azure OpenAI model. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/qa/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/qa/__init__.py index f8d27ad2675..09955b6da95 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/qa/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/qa/__init__.py @@ -4,7 +4,6 @@ __path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore -from promptflow.core import AzureOpenAIModelConfiguration from promptflow.evals.evaluators import ( CoherenceEvaluator, F1ScoreEvaluator, @@ -16,7 +15,7 @@ class QAEvaluator: - def __init__(self, model_config: AzureOpenAIModelConfiguration): + def __init__(self, model_config): """ Initialize an evaluator configured for a specific Azure OpenAI model. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/relevance/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/relevance/__init__.py index 95d93a67f89..6d1d89ad68a 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/relevance/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/relevance/__init__.py @@ -7,12 +7,11 @@ from pathlib import Path from promptflow.client import load_flow -from promptflow.core import AzureOpenAIModelConfiguration from promptflow.core._prompty_utils import convert_model_configuration_to_connection class RelevanceEvaluator: - def __init__(self, model_config: AzureOpenAIModelConfiguration): + def __init__(self, model_config): """ Initialize an evaluator configured for a specific Azure OpenAI model. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/similarity/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/similarity/__init__.py index 58f27d786c8..a36bd032a1f 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/similarity/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/similarity/__init__.py @@ -7,12 +7,11 @@ from pathlib import Path from promptflow.client import load_flow -from promptflow.core import AzureOpenAIModelConfiguration from promptflow.core._prompty_utils import convert_model_configuration_to_connection class SimilarityEvaluator: - def __init__(self, model_config: AzureOpenAIModelConfiguration): + def __init__(self, model_config): """ Initialize an evaluator configured for a specific Azure OpenAI model. diff --git a/src/promptflow-evals/tests/evals/conftest.py b/src/promptflow-evals/tests/evals/conftest.py index 88a91288f84..006048bc063 100644 --- a/src/promptflow-evals/tests/evals/conftest.py +++ b/src/promptflow-evals/tests/evals/conftest.py @@ -6,6 +6,7 @@ import pytest from pytest_mock import MockerFixture +from promptflow.client import PFClient from promptflow.core import AzureOpenAIModelConfiguration from promptflow.executor._line_execution_process_pool import _process_wrapper from promptflow.executor._process_manager import create_spawned_fork_process_manager @@ -72,6 +73,12 @@ def model_config() -> dict: return model_config +@pytest.fixture +def pf_client() -> PFClient: + """The fixture, returning PRClient""" + return PFClient() + + # ==================== Recording injection ==================== # To inject patches in subprocesses, add new mock method in setup_recording_injection_if_enabled # in fork mode, this is automatically enabled. diff --git a/src/promptflow-evals/tests/evals/unittests/test_save_eval.py b/src/promptflow-evals/tests/evals/unittests/test_save_eval.py new file mode 100644 index 00000000000..4d997dc18f2 --- /dev/null +++ b/src/promptflow-evals/tests/evals/unittests/test_save_eval.py @@ -0,0 +1,38 @@ +from typing import Any, List, Optional, Type + +import inspect +import os +import pytest + +from promptflow.evals import evaluators +from promptflow.evals.evaluators import content_safety + + +def get_evaluators_from_module(namespace: Any, exceptions: Optional[List[str]] = None) -> List[Type]: + evaluators = [] + for name, obj in inspect.getmembers(namespace): + if inspect.isclass(obj): + if exceptions and name in exceptions: + continue + evaluators.append(obj) + return evaluators + + +@pytest.mark.unittest +class TestSaveEval: + """Test saving evaluators.""" + + EVALUATORS = get_evaluators_from_module(evaluators) + RAI_EVALUATORS = get_evaluators_from_module(content_safety) + + @pytest.mark.parametrize('evaluator', EVALUATORS) + def test_save_evaluators(self, tmpdir, pf_client, evaluator) -> None: + """Test regular evaluator saving.""" + pf_client.flows.save(evaluator, path=tmpdir) + assert os.path.isfile(os.path.join(tmpdir, 'flow.flex.yaml')) + + @pytest.mark.parametrize('rai_evaluator', RAI_EVALUATORS) + def test_save_rai_evaluators(self, tmpdir, pf_client, rai_evaluator): + """Test saving of RAI evaluators""" + pf_client.flows.save(rai_evaluator, path=tmpdir) + assert os.path.isfile(os.path.join(tmpdir, 'flow.flex.yaml'))