fix the test pipelines

microsoft · Apr 16, 2024 · 752b7c2 · 752b7c2
1 parent a2680c9
commit 752b7c2
Show file tree

Hide file tree

Showing 4 changed files with 31 additions and 22 deletions.
diff --git a/.github/workflows/promptflow-evals-e2e-test.yml b/.github/workflows/promptflow-evals-e2e-test.yml
@@ -75,8 +75,7 @@ jobs:
         run: poetry install
         working-directory: ${{ env.RECORD_DIRECTORY }}
       - name: generate end-to-end test config from secret
-        # TODO: replace with evals secret
-        run: echo '${{ secrets.PF_TRACING_E2E_TEST_CONFIG }}' >> connections.json
+        run: echo '${{ secrets.PF_EVALS_E2E_TEST_CONFIG }}' >> connections.json
         working-directory: ${{ env.WORKING_DIRECTORY }}
       - name: run e2e tests
         run: poetry run pytest -m e2etest --cov=promptflow --cov-config=pyproject.toml --cov-report=term --cov-report=html --cov-report=xml

diff --git a/src/promptflow-evals/tests/evals/conftest.py b/src/promptflow-evals/tests/evals/conftest.py
@@ -44,6 +44,16 @@ def pytest_configure():
     pytest.is_in_ci_pipeline = is_in_ci_pipeline()
 
 
+@pytest.fixture
+def mock_model_config() -> dict:
+    return AzureOpenAIModelConfiguration(
+        azure_endpoint="aoai-api-endpoint",
+        api_key="aoai-api-key",
+        api_version="2023-07-01-preview",
+        azure_deployment="aoai-deployment",
+    )
+
+
 @pytest.fixture
 def model_config() -> dict:
     conn_name = "azure_openai_model_config"

diff --git a/src/promptflow-evals/tests/evals/unittests/test_chat_evaluator.py b/src/promptflow-evals/tests/evals/unittests/test_chat_evaluator.py
@@ -3,10 +3,10 @@
 from promptflow.evals.evaluators import ChatEvaluator
 
 
-@pytest.mark.usefixtures("model_config")
+@pytest.mark.usefixtures("mock_model_config")
 @pytest.mark.unittest
 class TestChatEvaluator:
-    def test_conversation_validation_normal(self, model_config):
+    def test_conversation_validation_normal(self, mock_model_config):
         conversation = [
             {"role": "user", "content": "What is the value of 2 + 2?"},
             {
@@ -33,57 +33,57 @@ def test_conversation_validation_normal(self, model_config):
             },
         ]
 
-        chat_eval = ChatEvaluator(model_config=model_config)
+        chat_eval = ChatEvaluator(model_config=mock_model_config)
         chat_eval._non_rag_evaluators = []
         chat_eval._rag_evaluators = []
 
         chat_eval(conversation=conversation)
 
-    def test_conversation_validation_missing_role(self, model_config):
+    def test_conversation_validation_missing_role(self, mock_model_config):
         conversation = [
             {"role": "user", "content": "question 1"},
             {"content": "answer 1"},
         ]
 
-        chat_eval = ChatEvaluator(model_config=model_config)
+        chat_eval = ChatEvaluator(model_config=mock_model_config)
         chat_eval._non_rag_evaluators = []
         chat_eval._rag_evaluators = []
 
         with pytest.raises(ValueError) as e:
             chat_eval(conversation=conversation)
         assert str(e.value) == "Each turn in 'conversation' must have 'role' and 'content' keys. Turn number: 2"
 
-    def test_conversation_validation_question_answer_not_paired(self, model_config):
+    def test_conversation_validation_question_answer_not_paired(self, mock_model_config):
         conversation = [
             {"role": "user", "content": "question 1"},
             {"role": "assistant", "content": "answer 1"},
             {"role": "assistant", "content": "answer 2"},
         ]
 
-        chat_eval = ChatEvaluator(model_config=model_config)
+        chat_eval = ChatEvaluator(model_config=mock_model_config)
         chat_eval._non_rag_evaluators = []
         chat_eval._rag_evaluators = []
 
         with pytest.raises(ValueError) as e:
             chat_eval(conversation=conversation)
         assert str(e.value) == "Expected role user but got assistant. Turn number: 3"
 
-    def test_conversation_validation_invalid_citations(self, model_config):
+    def test_conversation_validation_invalid_citations(self, mock_model_config):
         conversation = [
             {"role": "user", "content": "question 1"},
             {"role": "assistant", "content": "answer 1", "context": {"citations": "invalid"}},
         ]
 
-        chat_eval = ChatEvaluator(model_config=model_config)
+        chat_eval = ChatEvaluator(model_config=mock_model_config)
         chat_eval._non_rag_evaluators = []
         chat_eval._rag_evaluators = []
 
         with pytest.raises(ValueError) as e:
             chat_eval(conversation=conversation)
         assert str(e.value) == "'citations' in context must be a list. Turn number: 2"
 
-    def test_per_turn_results_aggregation(self, model_config):
-        chat_eval = ChatEvaluator(model_config=model_config)
+    def test_per_turn_results_aggregation(self, mock_model_config):
+        chat_eval = ChatEvaluator(model_config=mock_model_config)
 
         per_turn_results = [
             {

diff --git a/src/promptflow-evals/tests/evals/unittests/test_evaluate.py b/src/promptflow-evals/tests/evals/unittests/test_evaluate.py
@@ -19,38 +19,38 @@ def missing_columns_jsonl_file():
     return os.path.join(data_path, "missing_columns_evaluate_test_data.jsonl")
 
 
-@pytest.mark.usefixtures("model_config")
+@pytest.mark.usefixtures("mock_model_config")
 @pytest.mark.unittest
 class TestEvaluate:
-    def test_evaluate_missing_data(self, model_config):
+    def test_evaluate_missing_data(self, mock_model_config):
         with pytest.raises(ValueError) as exc_info:
-            evaluate(evaluators={"g": GroundednessEvaluator(model_config=model_config)})
+            evaluate(evaluators={"g": GroundednessEvaluator(model_config=mock_model_config)})
 
         assert "data must be provided for evaluation." in exc_info.value.args[0]
 
-    def test_evaluate_evaluators_not_a_dict(self, model_config):
+    def test_evaluate_evaluators_not_a_dict(self, mock_model_config):
         with pytest.raises(ValueError) as exc_info:
             evaluate(
                 data="data",
-                evaluators=[GroundednessEvaluator(model_config=model_config)],
+                evaluators=[GroundednessEvaluator(model_config=mock_model_config)],
             )
 
         assert "evaluators must be a dictionary." in exc_info.value.args[0]
 
-    def test_evaluate_invalid_data(self, model_config):
+    def test_evaluate_invalid_data(self, mock_model_config):
         with pytest.raises(ValueError) as exc_info:
             evaluate(
                 data=123,
-                evaluators={"g": GroundednessEvaluator(model_config=model_config)},
+                evaluators={"g": GroundednessEvaluator(model_config=mock_model_config)},
             )
 
         assert "data must be a string." in exc_info.value.args[0]
 
-    def test_evaluate_invalid_jsonl_data(self, model_config, invalid_jsonl_file):
+    def test_evaluate_invalid_jsonl_data(self, mock_model_config, invalid_jsonl_file):
         with pytest.raises(ValueError) as exc_info:
             evaluate(
                 data=invalid_jsonl_file,
-                evaluators={"g": GroundednessEvaluator(model_config=model_config)},
+                evaluators={"g": GroundednessEvaluator(model_config=mock_model_config)},
             )
 
         assert "Failed to load data from " in exc_info.value.args[0]