Skip to content

Commit

Permalink
fix the test pipelines
Browse files Browse the repository at this point in the history
  • Loading branch information
ninghu committed Apr 16, 2024
1 parent a2680c9 commit 752b7c2
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 22 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/promptflow-evals-e2e-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,7 @@ jobs:
run: poetry install
working-directory: ${{ env.RECORD_DIRECTORY }}
- name: generate end-to-end test config from secret
# TODO: replace with evals secret
run: echo '${{ secrets.PF_TRACING_E2E_TEST_CONFIG }}' >> connections.json
run: echo '${{ secrets.PF_EVALS_E2E_TEST_CONFIG }}' >> connections.json
working-directory: ${{ env.WORKING_DIRECTORY }}
- name: run e2e tests
run: poetry run pytest -m e2etest --cov=promptflow --cov-config=pyproject.toml --cov-report=term --cov-report=html --cov-report=xml
Expand Down
10 changes: 10 additions & 0 deletions src/promptflow-evals/tests/evals/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,16 @@ def pytest_configure():
pytest.is_in_ci_pipeline = is_in_ci_pipeline()


@pytest.fixture
def mock_model_config() -> dict:
return AzureOpenAIModelConfiguration(
azure_endpoint="aoai-api-endpoint",
api_key="aoai-api-key",
api_version="2023-07-01-preview",
azure_deployment="aoai-deployment",
)


@pytest.fixture
def model_config() -> dict:
conn_name = "azure_openai_model_config"
Expand Down
22 changes: 11 additions & 11 deletions src/promptflow-evals/tests/evals/unittests/test_chat_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
from promptflow.evals.evaluators import ChatEvaluator


@pytest.mark.usefixtures("model_config")
@pytest.mark.usefixtures("mock_model_config")
@pytest.mark.unittest
class TestChatEvaluator:
def test_conversation_validation_normal(self, model_config):
def test_conversation_validation_normal(self, mock_model_config):
conversation = [
{"role": "user", "content": "What is the value of 2 + 2?"},
{
Expand All @@ -33,57 +33,57 @@ def test_conversation_validation_normal(self, model_config):
},
]

chat_eval = ChatEvaluator(model_config=model_config)
chat_eval = ChatEvaluator(model_config=mock_model_config)
chat_eval._non_rag_evaluators = []
chat_eval._rag_evaluators = []

chat_eval(conversation=conversation)

def test_conversation_validation_missing_role(self, model_config):
def test_conversation_validation_missing_role(self, mock_model_config):
conversation = [
{"role": "user", "content": "question 1"},
{"content": "answer 1"},
]

chat_eval = ChatEvaluator(model_config=model_config)
chat_eval = ChatEvaluator(model_config=mock_model_config)
chat_eval._non_rag_evaluators = []
chat_eval._rag_evaluators = []

with pytest.raises(ValueError) as e:
chat_eval(conversation=conversation)
assert str(e.value) == "Each turn in 'conversation' must have 'role' and 'content' keys. Turn number: 2"

def test_conversation_validation_question_answer_not_paired(self, model_config):
def test_conversation_validation_question_answer_not_paired(self, mock_model_config):
conversation = [
{"role": "user", "content": "question 1"},
{"role": "assistant", "content": "answer 1"},
{"role": "assistant", "content": "answer 2"},
]

chat_eval = ChatEvaluator(model_config=model_config)
chat_eval = ChatEvaluator(model_config=mock_model_config)
chat_eval._non_rag_evaluators = []
chat_eval._rag_evaluators = []

with pytest.raises(ValueError) as e:
chat_eval(conversation=conversation)
assert str(e.value) == "Expected role user but got assistant. Turn number: 3"

def test_conversation_validation_invalid_citations(self, model_config):
def test_conversation_validation_invalid_citations(self, mock_model_config):
conversation = [
{"role": "user", "content": "question 1"},
{"role": "assistant", "content": "answer 1", "context": {"citations": "invalid"}},
]

chat_eval = ChatEvaluator(model_config=model_config)
chat_eval = ChatEvaluator(model_config=mock_model_config)
chat_eval._non_rag_evaluators = []
chat_eval._rag_evaluators = []

with pytest.raises(ValueError) as e:
chat_eval(conversation=conversation)
assert str(e.value) == "'citations' in context must be a list. Turn number: 2"

def test_per_turn_results_aggregation(self, model_config):
chat_eval = ChatEvaluator(model_config=model_config)
def test_per_turn_results_aggregation(self, mock_model_config):
chat_eval = ChatEvaluator(model_config=mock_model_config)

per_turn_results = [
{
Expand Down
18 changes: 9 additions & 9 deletions src/promptflow-evals/tests/evals/unittests/test_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,38 +19,38 @@ def missing_columns_jsonl_file():
return os.path.join(data_path, "missing_columns_evaluate_test_data.jsonl")


@pytest.mark.usefixtures("model_config")
@pytest.mark.usefixtures("mock_model_config")
@pytest.mark.unittest
class TestEvaluate:
def test_evaluate_missing_data(self, model_config):
def test_evaluate_missing_data(self, mock_model_config):
with pytest.raises(ValueError) as exc_info:
evaluate(evaluators={"g": GroundednessEvaluator(model_config=model_config)})
evaluate(evaluators={"g": GroundednessEvaluator(model_config=mock_model_config)})

assert "data must be provided for evaluation." in exc_info.value.args[0]

def test_evaluate_evaluators_not_a_dict(self, model_config):
def test_evaluate_evaluators_not_a_dict(self, mock_model_config):
with pytest.raises(ValueError) as exc_info:
evaluate(
data="data",
evaluators=[GroundednessEvaluator(model_config=model_config)],
evaluators=[GroundednessEvaluator(model_config=mock_model_config)],
)

assert "evaluators must be a dictionary." in exc_info.value.args[0]

def test_evaluate_invalid_data(self, model_config):
def test_evaluate_invalid_data(self, mock_model_config):
with pytest.raises(ValueError) as exc_info:
evaluate(
data=123,
evaluators={"g": GroundednessEvaluator(model_config=model_config)},
evaluators={"g": GroundednessEvaluator(model_config=mock_model_config)},
)

assert "data must be a string." in exc_info.value.args[0]

def test_evaluate_invalid_jsonl_data(self, model_config, invalid_jsonl_file):
def test_evaluate_invalid_jsonl_data(self, mock_model_config, invalid_jsonl_file):
with pytest.raises(ValueError) as exc_info:
evaluate(
data=invalid_jsonl_file,
evaluators={"g": GroundednessEvaluator(model_config=model_config)},
evaluators={"g": GroundednessEvaluator(model_config=mock_model_config)},
)

assert "Failed to load data from " in exc_info.value.args[0]
Expand Down

0 comments on commit 752b7c2

Please sign in to comment.