Skip to content

Commit b7ebbd7

Browse files
new metrics
1 parent 4026c88 commit b7ebbd7

File tree

1 file changed

+11
-13
lines changed

1 file changed

+11
-13
lines changed

backend/tests/Ragas/utils/modules/ragas_evaluation.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
from ragas import evaluate, EvaluationDataset, SingleTurnSample
1111
from ragas.llms import LangchainLLMWrapper
1212
from langchain_openai.chat_models import ChatOpenAI
13-
from ragas.metrics import answer_relevancy, ContextRelevance, SemanticSimilarity, context_precision
13+
from ragas.metrics import AnswerAccuracy , SemanticSimilarity, FactualCorrectness
14+
1415
from ragas.embeddings import LangchainEmbeddingsWrapper
1516
from langchain_openai import OpenAIEmbeddings
1617
from dotenv import load_dotenv
@@ -67,7 +68,6 @@ def create_ragas_dataset(data):
6768
# Create a sample using the RAGAS SingleTurnSample class
6869
eval_sample = SingleTurnSample(
6970
user_input=sample.get("user_input", ""),
70-
retrieved_contexts=[context for context in sample.get("reference_contexts", []) if context],
7171
response=sample.get("response", ""),
7272
reference=reference, # Use either provided reference or first context
7373
)
@@ -145,33 +145,31 @@ async def evaluate_with_ragas(jsonl_path: str) -> pd.DataFrame:
145145

146146
# Define metrics to use for evaluation
147147
print(
148-
"Configuring default RAGAS metrics: semantic_similarity, "
149-
"answer_relevancy, context_relevance, context_precision"
148+
"Configuring default RAGAS metrics: semantic_similarity,factual_correctness, answer_accuracy"
150149
)
151150
metrics = [
152151
SemanticSimilarity(),
153-
answer_relevancy,
154-
context_precision,
155-
ContextRelevance(llm=llm),
152+
FactualCorrectness(llm=llm),
153+
AnswerAccuracy(llm=llm),
156154
]
157155

158156
# Run the evaluation
159157
print("Running RAGAS evaluation (this may take a while)...")
160158
results = evaluate(dataset=dataset, metrics=metrics, llm=llm)
161-
162159
try:
163160
print("Processing evaluation results including llm_usage if present...")
164161
# Define expected metrics for alignment and output naming
165162
expected_metrics = [
166-
("nv_context_relevance", "recontext_relevance"),
167-
("context_precision", "context_precision"),
168-
("answer_relevancy", "answer_relevancy"),
169-
("semantic_similarity", "semantic_similarity"),
170-
]
163+
("factual_correctness(mode=f1)", "factual_correctness"),
164+
("nv_accuracy", "answer_accuracy"),
165+
("semantic_similarity", "semantic_similarity"),
166+
]
167+
171168

172169
df = results.to_pandas()
173170
available_columns = list(df.columns)
174171
print(f"Results DataFrame columns: {available_columns}")
172+
175173

176174
# Verify required columns
177175
missing = [raw for raw, _ in expected_metrics if raw not in available_columns]

0 commit comments

Comments
 (0)