From c1a386a196cbd630bab2a423144c51db0f468f5f Mon Sep 17 00:00:00 2001 From: nick863 <30440255+nick863@users.noreply.github.com> Date: Tue, 16 Apr 2024 16:37:46 -0700 Subject: [PATCH 1/6] Add test to check for evaluator saving --- .../evals/evaluators/chat/__init__.py | 4 +- .../evals/evaluators/coherence/__init__.py | 4 +- .../evals/evaluators/fluency/__init__.py | 4 +- .../evals/evaluators/groundedness/__init__.py | 4 +- .../evals/evaluators/relevance/__init__.py | 4 +- .../evals/evaluators/similarity/__init__.py | 4 +- .../tests/evals/unittests/test_save_eval.py | 41 +++++++++++++++++++ 7 files changed, 58 insertions(+), 7 deletions(-) create mode 100644 src/promptflow-evals/tests/evals/unittests/test_save_eval.py diff --git a/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py index 7878adf5c77..cae4f9ce10d 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py @@ -7,7 +7,7 @@ import json import logging from concurrent.futures import ThreadPoolExecutor, as_completed -from typing import Dict, List +from typing import Dict, List, Union import numpy as np @@ -19,7 +19,7 @@ class ChatEvaluator: def __init__( - self, model_config: AzureOpenAIModelConfiguration, eval_last_turn: bool = False, parallel: bool = True + self, model_config: Union[Dict, AzureOpenAIModelConfiguration], eval_last_turn: bool = False, parallel: bool = True ): """ Initialize an evaluator configured for a specific Azure OpenAI model. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/coherence/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/coherence/__init__.py index 2fb81de63b0..ac8f539fdb7 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/coherence/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/coherence/__init__.py @@ -4,6 +4,8 @@ __path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore +from typing import Union, Dict + from pathlib import Path from promptflow.client import load_flow @@ -12,7 +14,7 @@ class CoherenceEvaluator: - def __init__(self, model_config: AzureOpenAIModelConfiguration): + def __init__(self, model_config: Union[Dict, AzureOpenAIModelConfiguration]): """ Initialize an evaluator configured for a specific Azure OpenAI model. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/fluency/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/fluency/__init__.py index 54300057cf0..9c0e202d393 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/fluency/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/fluency/__init__.py @@ -4,6 +4,8 @@ __path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore +from typing import Union, Dict + from pathlib import Path from promptflow.client import load_flow @@ -12,7 +14,7 @@ class FluencyEvaluator: - def __init__(self, model_config: AzureOpenAIModelConfiguration): + def __init__(self, model_config: Union[Dict, AzureOpenAIModelConfiguration]): """ Initialize an evaluator configured for a specific Azure OpenAI model. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/groundedness/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/groundedness/__init__.py index f876a20c5bb..f84e3cf3498 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/groundedness/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/groundedness/__init__.py @@ -4,6 +4,8 @@ __path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore +from typing import Union, Dict + from pathlib import Path from promptflow.client import load_flow @@ -12,7 +14,7 @@ class GroundednessEvaluator: - def __init__(self, model_config: AzureOpenAIModelConfiguration): + def __init__(self, model_config: Union[Dict, AzureOpenAIModelConfiguration]): """ Initialize an evaluator configured for a specific Azure OpenAI model. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/relevance/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/relevance/__init__.py index 95d93a67f89..fde6f299ab4 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/relevance/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/relevance/__init__.py @@ -4,6 +4,8 @@ __path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore +from typing import Union, Dict + from pathlib import Path from promptflow.client import load_flow @@ -12,7 +14,7 @@ class RelevanceEvaluator: - def __init__(self, model_config: AzureOpenAIModelConfiguration): + def __init__(self, model_config: Union[Dict, AzureOpenAIModelConfiguration]): """ Initialize an evaluator configured for a specific Azure OpenAI model. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/similarity/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/similarity/__init__.py index 58f27d786c8..0d762c68e0d 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/similarity/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/similarity/__init__.py @@ -4,6 +4,8 @@ __path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore +from typing import Union, Dict + from pathlib import Path from promptflow.client import load_flow @@ -12,7 +14,7 @@ class SimilarityEvaluator: - def __init__(self, model_config: AzureOpenAIModelConfiguration): + def __init__(self, model_config: Union[Dict, AzureOpenAIModelConfiguration]): """ Initialize an evaluator configured for a specific Azure OpenAI model. diff --git a/src/promptflow-evals/tests/evals/unittests/test_save_eval.py b/src/promptflow-evals/tests/evals/unittests/test_save_eval.py new file mode 100644 index 00000000000..35a53f10f35 --- /dev/null +++ b/src/promptflow-evals/tests/evals/unittests/test_save_eval.py @@ -0,0 +1,41 @@ +from typing import Any, List, Optional + +import inspect +import os +import tempfile +import unittest +from promptflow.client import PFClient +from promptflow.evals import evaluators +from promptflow.evals.evaluators import content_safety + + +class TestSaveEval(unittest.TestCase): + """Test saving evaluators.""" + def setUp(self) -> None: + self.pf = PFClient() + unittest.TestCase.setUp(self) + + def _do_test_saving(self, + namespace: Any, + exceptions: Optional[List[str]] = None) -> None: + """Do the actual test on saving evaluators.""" + for name, obj in inspect.getmembers(namespace): + if inspect.isclass(obj): + if exceptions and name in exceptions: + continue + with tempfile.TemporaryDirectory() as d: + self.pf.flows.save(obj) + self.assertTrue(os.path.isfile(os.path.join(d, 'flow.flex.yaml'))) + + def test_save_evaluators(self) -> None: + """Test regular evaluator saving.""" + self._do_test_saving(evaluators, ['ChatEvaluator']) + + @unittest.skip('RAI models constructor contains credentials, which is not supported.') + def test_save_rai_evaluators(self): + """Test saving of RAI evaluators""" + self._do_test_saving(content_safety) + + +if __name__ == "__main__": + unittest.main() From a4fa72975a396ee68584a58ca8fe03239cb4cf20 Mon Sep 17 00:00:00 2001 From: nick863 <30440255+nick863@users.noreply.github.com> Date: Tue, 16 Apr 2024 17:21:04 -0700 Subject: [PATCH 2/6] Remove anntations --- .../promptflow/evals/evaluators/chat/__init__.py | 2 +- .../promptflow/evals/evaluators/coherence/__init__.py | 2 +- .../promptflow/evals/evaluators/fluency/__init__.py | 2 +- .../promptflow/evals/evaluators/groundedness/__init__.py | 2 +- src/promptflow-evals/promptflow/evals/evaluators/qa/__init__.py | 2 +- .../promptflow/evals/evaluators/relevance/__init__.py | 2 +- .../promptflow/evals/evaluators/similarity/__init__.py | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py index cae4f9ce10d..3d683d1c455 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py @@ -19,7 +19,7 @@ class ChatEvaluator: def __init__( - self, model_config: Union[Dict, AzureOpenAIModelConfiguration], eval_last_turn: bool = False, parallel: bool = True + self, model_config, eval_last_turn: bool = False, parallel: bool = True ): """ Initialize an evaluator configured for a specific Azure OpenAI model. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/coherence/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/coherence/__init__.py index ac8f539fdb7..fcb6f4f4725 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/coherence/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/coherence/__init__.py @@ -14,7 +14,7 @@ class CoherenceEvaluator: - def __init__(self, model_config: Union[Dict, AzureOpenAIModelConfiguration]): + def __init__(self, model_config): """ Initialize an evaluator configured for a specific Azure OpenAI model. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/fluency/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/fluency/__init__.py index 9c0e202d393..d0a9b6e137c 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/fluency/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/fluency/__init__.py @@ -14,7 +14,7 @@ class FluencyEvaluator: - def __init__(self, model_config: Union[Dict, AzureOpenAIModelConfiguration]): + def __init__(self, model_config): """ Initialize an evaluator configured for a specific Azure OpenAI model. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/groundedness/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/groundedness/__init__.py index f84e3cf3498..d2ddb4c55a6 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/groundedness/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/groundedness/__init__.py @@ -14,7 +14,7 @@ class GroundednessEvaluator: - def __init__(self, model_config: Union[Dict, AzureOpenAIModelConfiguration]): + def __init__(self, model_config): """ Initialize an evaluator configured for a specific Azure OpenAI model. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/qa/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/qa/__init__.py index f8d27ad2675..a218b5405aa 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/qa/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/qa/__init__.py @@ -16,7 +16,7 @@ class QAEvaluator: - def __init__(self, model_config: AzureOpenAIModelConfiguration): + def __init__(self, model_config): """ Initialize an evaluator configured for a specific Azure OpenAI model. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/relevance/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/relevance/__init__.py index fde6f299ab4..20d79b2941f 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/relevance/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/relevance/__init__.py @@ -14,7 +14,7 @@ class RelevanceEvaluator: - def __init__(self, model_config: Union[Dict, AzureOpenAIModelConfiguration]): + def __init__(self, model_config): """ Initialize an evaluator configured for a specific Azure OpenAI model. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/similarity/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/similarity/__init__.py index 0d762c68e0d..0eb819c02d8 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/similarity/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/similarity/__init__.py @@ -14,7 +14,7 @@ class SimilarityEvaluator: - def __init__(self, model_config: Union[Dict, AzureOpenAIModelConfiguration]): + def __init__(self, model_config): """ Initialize an evaluator configured for a specific Azure OpenAI model. From 2f2808d6d4480b73e265e1aaf0caa5b555aa703a Mon Sep 17 00:00:00 2001 From: nick863 <30440255+nick863@users.noreply.github.com> Date: Tue, 16 Apr 2024 17:54:30 -0700 Subject: [PATCH 3/6] Remove types as a temporary solution to save flows. --- .../promptflow/evals/evaluators/chat/__init__.py | 3 +-- .../promptflow/evals/evaluators/coherence/__init__.py | 3 --- .../promptflow/evals/evaluators/coherence/flow.flex.yaml | 9 +++++++++ .../promptflow/evals/evaluators/fluency/__init__.py | 3 --- .../promptflow/evals/evaluators/groundedness/__init__.py | 3 --- .../promptflow/evals/evaluators/qa/__init__.py | 1 - .../promptflow/evals/evaluators/relevance/__init__.py | 3 --- .../promptflow/evals/evaluators/similarity/__init__.py | 3 --- .../tests/evals/unittests/test_save_eval.py | 3 ++- 9 files changed, 12 insertions(+), 19 deletions(-) create mode 100644 src/promptflow-evals/promptflow/evals/evaluators/coherence/flow.flex.yaml diff --git a/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py index 3d683d1c455..2d62d491d39 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py @@ -7,11 +7,10 @@ import json import logging from concurrent.futures import ThreadPoolExecutor, as_completed -from typing import Dict, List, Union +from typing import Dict, List import numpy as np -from promptflow.core import AzureOpenAIModelConfiguration from promptflow.evals.evaluators import CoherenceEvaluator, FluencyEvaluator, GroundednessEvaluator, RelevanceEvaluator logger = logging.getLogger(__name__) diff --git a/src/promptflow-evals/promptflow/evals/evaluators/coherence/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/coherence/__init__.py index fcb6f4f4725..023a52845d8 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/coherence/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/coherence/__init__.py @@ -4,12 +4,9 @@ __path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore -from typing import Union, Dict - from pathlib import Path from promptflow.client import load_flow -from promptflow.core import AzureOpenAIModelConfiguration from promptflow.core._prompty_utils import convert_model_configuration_to_connection diff --git a/src/promptflow-evals/promptflow/evals/evaluators/coherence/flow.flex.yaml b/src/promptflow-evals/promptflow/evals/evaluators/coherence/flow.flex.yaml new file mode 100644 index 00000000000..0ea2c8bb5c2 --- /dev/null +++ b/src/promptflow-evals/promptflow/evals/evaluators/coherence/flow.flex.yaml @@ -0,0 +1,9 @@ +inputs: + question: + type: string + answer: + type: string +init: + model_config: + type: object +entry: __init__:CoherenceEvaluator diff --git a/src/promptflow-evals/promptflow/evals/evaluators/fluency/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/fluency/__init__.py index d0a9b6e137c..4d8fc742c03 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/fluency/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/fluency/__init__.py @@ -4,12 +4,9 @@ __path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore -from typing import Union, Dict - from pathlib import Path from promptflow.client import load_flow -from promptflow.core import AzureOpenAIModelConfiguration from promptflow.core._prompty_utils import convert_model_configuration_to_connection diff --git a/src/promptflow-evals/promptflow/evals/evaluators/groundedness/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/groundedness/__init__.py index d2ddb4c55a6..5023ee640cc 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/groundedness/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/groundedness/__init__.py @@ -4,12 +4,9 @@ __path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore -from typing import Union, Dict - from pathlib import Path from promptflow.client import load_flow -from promptflow.core import AzureOpenAIModelConfiguration from promptflow.core._prompty_utils import convert_model_configuration_to_connection diff --git a/src/promptflow-evals/promptflow/evals/evaluators/qa/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/qa/__init__.py index a218b5405aa..09955b6da95 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/qa/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/qa/__init__.py @@ -4,7 +4,6 @@ __path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore -from promptflow.core import AzureOpenAIModelConfiguration from promptflow.evals.evaluators import ( CoherenceEvaluator, F1ScoreEvaluator, diff --git a/src/promptflow-evals/promptflow/evals/evaluators/relevance/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/relevance/__init__.py index 20d79b2941f..6d1d89ad68a 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/relevance/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/relevance/__init__.py @@ -4,12 +4,9 @@ __path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore -from typing import Union, Dict - from pathlib import Path from promptflow.client import load_flow -from promptflow.core import AzureOpenAIModelConfiguration from promptflow.core._prompty_utils import convert_model_configuration_to_connection diff --git a/src/promptflow-evals/promptflow/evals/evaluators/similarity/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/similarity/__init__.py index 0eb819c02d8..a36bd032a1f 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/similarity/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/similarity/__init__.py @@ -4,12 +4,9 @@ __path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore -from typing import Union, Dict - from pathlib import Path from promptflow.client import load_flow -from promptflow.core import AzureOpenAIModelConfiguration from promptflow.core._prompty_utils import convert_model_configuration_to_connection diff --git a/src/promptflow-evals/tests/evals/unittests/test_save_eval.py b/src/promptflow-evals/tests/evals/unittests/test_save_eval.py index 35a53f10f35..7a82b1d123b 100644 --- a/src/promptflow-evals/tests/evals/unittests/test_save_eval.py +++ b/src/promptflow-evals/tests/evals/unittests/test_save_eval.py @@ -11,6 +11,7 @@ class TestSaveEval(unittest.TestCase): """Test saving evaluators.""" + def setUp(self) -> None: self.pf = PFClient() unittest.TestCase.setUp(self) @@ -24,7 +25,7 @@ def _do_test_saving(self, if exceptions and name in exceptions: continue with tempfile.TemporaryDirectory() as d: - self.pf.flows.save(obj) + self.pf.flows.save(obj, path=d) self.assertTrue(os.path.isfile(os.path.join(d, 'flow.flex.yaml'))) def test_save_evaluators(self) -> None: From 8c9f7dd0abb49614383680aca25f96685e14ab75 Mon Sep 17 00:00:00 2001 From: nick863 <30440255+nick863@users.noreply.github.com> Date: Wed, 17 Apr 2024 10:39:16 -0700 Subject: [PATCH 4/6] Fixes --- .../evals/evaluators/chat/__init__.py | 2 +- .../evals/evaluators/coherence/flow.flex.yaml | 9 ---- .../content_safety/hate_unfairness.py | 4 +- .../evaluators/content_safety/self_harm.py | 4 +- .../evals/evaluators/content_safety/sexual.py | 4 +- .../evaluators/content_safety/violence.py | 4 +- src/promptflow-evals/tests/evals/conftest.py | 7 +++ .../tests/evals/unittests/test_save_eval.py | 46 +++++++++---------- 8 files changed, 33 insertions(+), 47 deletions(-) delete mode 100644 src/promptflow-evals/promptflow/evals/evaluators/coherence/flow.flex.yaml diff --git a/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py index 2d62d491d39..e2fc2b8066a 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py @@ -63,7 +63,7 @@ def __init__( FluencyEvaluator(model_config), ] - def __call__(self, *, conversation: List[Dict], **kwargs): + def __call__(self, *, conversation, **kwargs): """Evaluates chat scenario. :param conversation: The conversation to be evaluated. Each turn should have "role" and "content" keys. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/coherence/flow.flex.yaml b/src/promptflow-evals/promptflow/evals/evaluators/coherence/flow.flex.yaml deleted file mode 100644 index 0ea2c8bb5c2..00000000000 --- a/src/promptflow-evals/promptflow/evals/evaluators/coherence/flow.flex.yaml +++ /dev/null @@ -1,9 +0,0 @@ -inputs: - question: - type: string - answer: - type: string -init: - model_config: - type: object -entry: __init__:CoherenceEvaluator diff --git a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/hate_unfairness.py b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/hate_unfairness.py index 6cb9467533f..323df141d2e 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/hate_unfairness.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/hate_unfairness.py @@ -1,12 +1,10 @@ from promptflow.client import load_flow from pathlib import Path -from azure.core.credentials import TokenCredential from promptflow.evals._constants import EvaluationMetrics -from typing import Optional class HateUnfairnessEvaluator: - def __init__(self, project_scope: dict, credential: Optional[TokenCredential] = None): + def __init__(self, project_scope: dict, credential=None): """ Initialize an evaluator for hate unfairness score. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/self_harm.py b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/self_harm.py index bb01f05dd55..b2ff8554bfd 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/self_harm.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/self_harm.py @@ -1,12 +1,10 @@ from promptflow.client import load_flow from pathlib import Path -from azure.core.credentials import TokenCredential from promptflow.evals._constants import EvaluationMetrics -from typing import Optional class SelfHarmEvaluator: - def __init__(self, project_scope: dict, credential: Optional[TokenCredential] = None): + def __init__(self, project_scope: dict, credential=None): """ Initialize an evaluator for self harm score. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/sexual.py b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/sexual.py index fb0d3f79d71..b1247369cdc 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/sexual.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/sexual.py @@ -1,12 +1,10 @@ from promptflow.client import load_flow from pathlib import Path -from azure.core.credentials import TokenCredential from promptflow.evals._constants import EvaluationMetrics -from typing import Optional class SexualEvaluator: - def __init__(self, project_scope: dict, credential: Optional[TokenCredential] = None): + def __init__(self, project_scope: dict, credential=None): """ Initialize an evaluator for sexual score. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/violence.py b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/violence.py index 47382d2c330..29bc631c866 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/violence.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/violence.py @@ -1,12 +1,10 @@ from promptflow.client import load_flow from pathlib import Path -from azure.core.credentials import TokenCredential from promptflow.evals._constants import EvaluationMetrics -from typing import Optional class ViolenceEvaluator: - def __init__(self, project_scope: dict, credential: Optional[TokenCredential] = None): + def __init__(self, project_scope: dict, credential=None): """ Initialize an evaluator for violence score. diff --git a/src/promptflow-evals/tests/evals/conftest.py b/src/promptflow-evals/tests/evals/conftest.py index 88a91288f84..006048bc063 100644 --- a/src/promptflow-evals/tests/evals/conftest.py +++ b/src/promptflow-evals/tests/evals/conftest.py @@ -6,6 +6,7 @@ import pytest from pytest_mock import MockerFixture +from promptflow.client import PFClient from promptflow.core import AzureOpenAIModelConfiguration from promptflow.executor._line_execution_process_pool import _process_wrapper from promptflow.executor._process_manager import create_spawned_fork_process_manager @@ -72,6 +73,12 @@ def model_config() -> dict: return model_config +@pytest.fixture +def pf_client() -> PFClient: + """The fixture, returning PRClient""" + return PFClient() + + # ==================== Recording injection ==================== # To inject patches in subprocesses, add new mock method in setup_recording_injection_if_enabled # in fork mode, this is automatically enabled. diff --git a/src/promptflow-evals/tests/evals/unittests/test_save_eval.py b/src/promptflow-evals/tests/evals/unittests/test_save_eval.py index 7a82b1d123b..0a1a3280e9b 100644 --- a/src/promptflow-evals/tests/evals/unittests/test_save_eval.py +++ b/src/promptflow-evals/tests/evals/unittests/test_save_eval.py @@ -1,42 +1,38 @@ -from typing import Any, List, Optional +from typing import Any, List, Optional, Type import inspect import os -import tempfile -import unittest -from promptflow.client import PFClient +import pytest + from promptflow.evals import evaluators from promptflow.evals.evaluators import content_safety -class TestSaveEval(unittest.TestCase): +@pytest.mark.unittest +class TestSaveEval: """Test saving evaluators.""" - def setUp(self) -> None: - self.pf = PFClient() - unittest.TestCase.setUp(self) - - def _do_test_saving(self, - namespace: Any, - exceptions: Optional[List[str]] = None) -> None: - """Do the actual test on saving evaluators.""" + @staticmethod + def get_evaluators_from_module(namespace: Any, exceptions: Optional[List[str]] = None) -> List[Type]: + evaluators = [] for name, obj in inspect.getmembers(namespace): if inspect.isclass(obj): if exceptions and name in exceptions: continue - with tempfile.TemporaryDirectory() as d: - self.pf.flows.save(obj, path=d) - self.assertTrue(os.path.isfile(os.path.join(d, 'flow.flex.yaml'))) + evaluators.append(obj) + return evaluators + + EVALUATORS = get_evaluators_from_module(evaluators) + RAI_EVALUATORS = get_evaluators_from_module(content_safety) - def test_save_evaluators(self) -> None: + @pytest.mark.parametrize('evaluator', EVALUATORS) + def test_save_evaluators(self, tmpdir, pf_client, evaluator) -> None: """Test regular evaluator saving.""" - self._do_test_saving(evaluators, ['ChatEvaluator']) + pf_client.flows.save(evaluator, path=tmpdir) + assert os.path.isfile(os.path.join(tmpdir, 'flow.flex.yaml')) - @unittest.skip('RAI models constructor contains credentials, which is not supported.') - def test_save_rai_evaluators(self): + @pytest.mark.parametrize('rai_evaluator', RAI_EVALUATORS) + def test_save_rai_evaluators(self, tmpdir, pf_client, rai_evaluator): """Test saving of RAI evaluators""" - self._do_test_saving(content_safety) - - -if __name__ == "__main__": - unittest.main() + pf_client.flows.save(rai_evaluator, path=tmpdir) + assert os.path.isfile(os.path.join(tmpdir, 'flow.flex.yaml')) From 4af8c578fc1dc7de5e63f70104c97b883dee7c3c Mon Sep 17 00:00:00 2001 From: nick863 <30440255+nick863@users.noreply.github.com> Date: Wed, 17 Apr 2024 10:45:43 -0700 Subject: [PATCH 5/6] Make staticmethod private --- .../tests/evals/unittests/test_save_eval.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/promptflow-evals/tests/evals/unittests/test_save_eval.py b/src/promptflow-evals/tests/evals/unittests/test_save_eval.py index 0a1a3280e9b..1ab43dd1b17 100644 --- a/src/promptflow-evals/tests/evals/unittests/test_save_eval.py +++ b/src/promptflow-evals/tests/evals/unittests/test_save_eval.py @@ -13,7 +13,7 @@ class TestSaveEval: """Test saving evaluators.""" @staticmethod - def get_evaluators_from_module(namespace: Any, exceptions: Optional[List[str]] = None) -> List[Type]: + def _get_evaluators_from_module(namespace: Any, exceptions: Optional[List[str]] = None) -> List[Type]: evaluators = [] for name, obj in inspect.getmembers(namespace): if inspect.isclass(obj): @@ -22,8 +22,8 @@ def get_evaluators_from_module(namespace: Any, exceptions: Optional[List[str]] = evaluators.append(obj) return evaluators - EVALUATORS = get_evaluators_from_module(evaluators) - RAI_EVALUATORS = get_evaluators_from_module(content_safety) + EVALUATORS = _get_evaluators_from_module(evaluators) + RAI_EVALUATORS = _get_evaluators_from_module(content_safety) @pytest.mark.parametrize('evaluator', EVALUATORS) def test_save_evaluators(self, tmpdir, pf_client, evaluator) -> None: From 22e02a0c6dfdf7e7204531bc17b1c163155756c8 Mon Sep 17 00:00:00 2001 From: nick863 <30440255+nick863@users.noreply.github.com> Date: Wed, 17 Apr 2024 11:03:01 -0700 Subject: [PATCH 6/6] Fix unit test --- .../tests/evals/unittests/test_save_eval.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/promptflow-evals/tests/evals/unittests/test_save_eval.py b/src/promptflow-evals/tests/evals/unittests/test_save_eval.py index 1ab43dd1b17..4d997dc18f2 100644 --- a/src/promptflow-evals/tests/evals/unittests/test_save_eval.py +++ b/src/promptflow-evals/tests/evals/unittests/test_save_eval.py @@ -8,22 +8,22 @@ from promptflow.evals.evaluators import content_safety +def get_evaluators_from_module(namespace: Any, exceptions: Optional[List[str]] = None) -> List[Type]: + evaluators = [] + for name, obj in inspect.getmembers(namespace): + if inspect.isclass(obj): + if exceptions and name in exceptions: + continue + evaluators.append(obj) + return evaluators + + @pytest.mark.unittest class TestSaveEval: """Test saving evaluators.""" - @staticmethod - def _get_evaluators_from_module(namespace: Any, exceptions: Optional[List[str]] = None) -> List[Type]: - evaluators = [] - for name, obj in inspect.getmembers(namespace): - if inspect.isclass(obj): - if exceptions and name in exceptions: - continue - evaluators.append(obj) - return evaluators - - EVALUATORS = _get_evaluators_from_module(evaluators) - RAI_EVALUATORS = _get_evaluators_from_module(content_safety) + EVALUATORS = get_evaluators_from_module(evaluators) + RAI_EVALUATORS = get_evaluators_from_module(content_safety) @pytest.mark.parametrize('evaluator', EVALUATORS) def test_save_evaluators(self, tmpdir, pf_client, evaluator) -> None: