Skip to content

Commit 3218fb1

Browse files
Osai 96/evaluate improve new qa metrics (#40)
* Change metrics on Ragas * OSAI-96/evaluate-improve-new-metrics * run ruff * Break metrics in lines * new changes with ruff
1 parent c06c92c commit 3218fb1

File tree

2 files changed

+13
-9
lines changed

2 files changed

+13
-9
lines changed

backend/tests/Ragas/utils/modules/ragas_evaluation.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,8 @@
1010
import pandas as pd
1111
from ragas import evaluate, EvaluationDataset, SingleTurnSample
1212
from ragas.llms import LangchainLLMWrapper
13-
from langchain_openai import ChatOpenAI
14-
from ragas.metrics import FactualCorrectness, SemanticSimilarity
15-
from ragas.metrics._nv_metrics import AnswerAccuracy
13+
from langchain_openai.chat_models import ChatOpenAI
14+
from ragas.metrics import answer_relevancy, ContextRelevance, SemanticSimilarity, context_precision
1615
from ragas.embeddings import LangchainEmbeddingsWrapper
1716
from langchain_openai import OpenAIEmbeddings
1817
from dotenv import load_dotenv
@@ -151,11 +150,15 @@ async def evaluate_with_ragas(
151150
dataset, samples, processed_data = create_ragas_dataset(data)
152151

153152
# Define metrics to use for evaluation
154-
print("Configuring default RAGAS metrics: factual_correctness, semantic_similarity, answer_accuracy")
153+
print(
154+
"Configuring default RAGAS metrics: semantic_similarity, "
155+
"answer_relevancy, context_relevance, context_precision"
156+
)
155157
metrics = [
156-
FactualCorrectness(llm=llm),
157-
SemanticSimilarity(embeddings=embeddings_wrapper),
158-
AnswerAccuracy(llm=llm),
158+
SemanticSimilarity(),
159+
answer_relevancy,
160+
context_precision,
161+
ContextRelevance(llm=llm),
159162
]
160163

161164
# Run the evaluation
@@ -166,9 +169,10 @@ async def evaluate_with_ragas(
166169
print("Processing evaluation results including llm_usage if present...")
167170
# Define expected metrics for alignment and output naming
168171
expected_metrics = [
169-
("factual_correctness(mode=f1)", "factual_correctness"),
172+
("nv_context_relevance", "recontext_relevance"),
173+
("context_precision", "context_precision"),
174+
("answer_relevancy", "answer_relevancy"),
170175
("semantic_similarity", "semantic_similarity"),
171-
("nv_accuracy", "answer_accuracy"),
172176
]
173177

174178
df = results.to_pandas()

0 commit comments

Comments
 (0)