66
77import os
88from pathlib import Path
9- from typing import Optional
109import pandas as pd
1110from ragas import evaluate , EvaluationDataset , SingleTurnSample
1211from ragas .llms import LangchainLLMWrapper
1312from langchain_openai .chat_models import ChatOpenAI
14- from ragas .metrics import answer_relevancy , ContextRelevance , SemanticSimilarity , context_precision
13+ from ragas .metrics import AnswerAccuracy , SemanticSimilarity , FactualCorrectness
1514from ragas .embeddings import LangchainEmbeddingsWrapper
1615from langchain_openai import OpenAIEmbeddings
1716from dotenv import load_dotenv
17+ from .ragas_utils import load_jsonl_data
1818
1919
2020# Find the project root (where .env is located)
@@ -67,7 +67,6 @@ def create_ragas_dataset(data):
6767 # Create a sample using the RAGAS SingleTurnSample class
6868 eval_sample = SingleTurnSample (
6969 user_input = sample .get ("user_input" , "" ),
70- retrieved_contexts = [context for context in sample .get ("reference_contexts" , []) if context ],
7170 response = sample .get ("response" , "" ),
7271 reference = reference , # Use either provided reference or first context
7372 )
@@ -119,23 +118,17 @@ def create_ragas_llm():
119118 return LangchainLLMWrapper (chat_model ), ragas_embeddings
120119
121120
122- async def evaluate_with_ragas (
123- jsonl_path : str , output_json_path : Optional [str ] = None , skip_chart : bool = False
124- ) -> pd .DataFrame :
121+ async def evaluate_with_ragas (jsonl_path : str ) -> pd .DataFrame :
125122 """
126123 Evaluate responses using RAGAS metrics
127124
128125 Args:
129126 jsonl_path: Path to the input JSONL file with responses
130- output_json_path: Path to save the JSON results
131- skip_chart: Whether to skip generating the bar chart
132127
133128 Returns:
134129 DataFrame with evaluation results
135130 """
136131 # Import locally to avoid circular imports
137- from .ragas_utils import load_jsonl_data , save_results_to_json
138- from .ragas_visualization import generate_bar_chart
139132
140133 print ("Setting up RAGAS evaluation..." )
141134 print (f"Loading data from { jsonl_path } ..." )
@@ -150,28 +143,22 @@ async def evaluate_with_ragas(
150143 dataset , samples , processed_data = create_ragas_dataset (data )
151144
152145 # Define metrics to use for evaluation
153- print (
154- "Configuring default RAGAS metrics: semantic_similarity, "
155- "answer_relevancy, context_relevance, context_precision"
156- )
146+ print ("Configuring default RAGAS metrics: semantic_similarity,factual_correctness, answer_accuracy" )
157147 metrics = [
158148 SemanticSimilarity (),
159- answer_relevancy ,
160- context_precision ,
161- ContextRelevance (llm = llm ),
149+ FactualCorrectness (llm = llm ),
150+ AnswerAccuracy (llm = llm ),
162151 ]
163152
164153 # Run the evaluation
165154 print ("Running RAGAS evaluation (this may take a while)..." )
166155 results = evaluate (dataset = dataset , metrics = metrics , llm = llm )
167-
168156 try :
169157 print ("Processing evaluation results including llm_usage if present..." )
170158 # Define expected metrics for alignment and output naming
171159 expected_metrics = [
172- ("nv_context_relevance" , "recontext_relevance" ),
173- ("context_precision" , "context_precision" ),
174- ("answer_relevancy" , "answer_relevancy" ),
160+ ("factual_correctness(mode=f1)" , "factual_correctness" ),
161+ ("nv_accuracy" , "answer_accuracy" ),
175162 ("semantic_similarity" , "semantic_similarity" ),
176163 ]
177164
@@ -198,7 +185,8 @@ async def evaluate_with_ragas(
198185 row_dict [mapped ] = df .loc [idx , raw ]
199186 # Attach llm_usage if supplied in original input sample
200187 if "llm_usage" in processed_data [idx ]:
201- row_dict ["llm_usage" ] = processed_data [idx ]["llm_usage" ]
188+ for key , val in processed_data [idx ]["llm_usage" ].items ():
189+ row_dict [key ] = val
202190 rows .append (row_dict )
203191
204192 results_df = pd .DataFrame (rows )
@@ -232,20 +220,6 @@ async def evaluate_with_ragas(
232220 print (f"Could not process RAGAS results with llm_usage: { e } " )
233221 raise
234222
235- # Save results and generate visualization
236- if output_json_path :
237- # Save results to JSON
238- save_results_to_json (results_df , output_json_path )
239-
240- # Generate visualization if not disabled
241- if not skip_chart :
242- try :
243- chart_path = generate_bar_chart (output_json_path )
244- if chart_path :
245- print (f"Chart generated: { chart_path } " )
246- except Exception as e :
247- print (f"Chart generation failed: { e } " )
248-
249223 return results_df
250224
251225 except Exception as e :
0 commit comments