Skip to content

Commit f14b54a

Browse files
committed
Restructure RAGAS evaluation;
1 parent 3218fb1 commit f14b54a

File tree

3 files changed

+22
-47
lines changed

3 files changed

+22
-47
lines changed

backend/tests/Ragas/utils/README.md

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,10 @@ python enhanced_run_evaluation_pipeline.py /path/to/your/document.pdf 3
6868
After running the enhanced pipeline which produces the `ragas_evaluation_with_responses.jsonl` file, you must run the RAGAS evaluation script:
6969

7070
```bash
71-
python ragas_evaluate.py
71+
python ragas_evaluate.py --llm 'name-of-llm'
7272

7373
# With custom input/output paths
74-
python ragas_evaluate.py --input path/to/input.jsonl --output path/to/output.json
75-
76-
# Skip chart generation
77-
python ragas_evaluate.py --no-chart
74+
python ragas_evaluate.py --llm 'name-of-llm' --input path/to/input.jsonl --output path/to/output.csv
7875
```
7976

8077
## Environment Variables
@@ -116,8 +113,7 @@ All output files are stored in `../files/`:
116113

117114
- `ragas_evaluation_dataset.jsonl`: Initial questions and references
118115
- `ragas_evaluation_with_responses.jsonl`: Questions with API responses
119-
- `ragas_eval_result.json`: Evaluation metrics as configured in RAGAS_METRICS (default: factual_correctness, semantic_similarity, answer_accuracy)
120-
- `ragas_eval_result_chart.png`: Visualization of evaluation results
116+
- `ragas_eval_result.csv`: CSV of evaluation metrics, which is appended to on each run. Includes a column for the LLM name to support retrospective graph generation.
121117

122118
## Troubleshooting
123119

backend/tests/Ragas/utils/modules/ragas_evaluation.py

Lines changed: 5 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66

77
import os
88
from pathlib import Path
9-
from typing import Optional
109
import pandas as pd
1110
from ragas import evaluate, EvaluationDataset, SingleTurnSample
1211
from ragas.llms import LangchainLLMWrapper
@@ -15,6 +14,8 @@
1514
from ragas.embeddings import LangchainEmbeddingsWrapper
1615
from langchain_openai import OpenAIEmbeddings
1716
from dotenv import load_dotenv
17+
from .ragas_utils import load_jsonl_data
18+
1819

1920

2021
# Find the project root (where .env is located)
@@ -119,23 +120,17 @@ def create_ragas_llm():
119120
return LangchainLLMWrapper(chat_model), ragas_embeddings
120121

121122

122-
async def evaluate_with_ragas(
123-
jsonl_path: str, output_json_path: Optional[str] = None, skip_chart: bool = False
124-
) -> pd.DataFrame:
123+
async def evaluate_with_ragas(jsonl_path: str) -> pd.DataFrame:
125124
"""
126125
Evaluate responses using RAGAS metrics
127126
128127
Args:
129128
jsonl_path: Path to the input JSONL file with responses
130-
output_json_path: Path to save the JSON results
131-
skip_chart: Whether to skip generating the bar chart
132129
133130
Returns:
134131
DataFrame with evaluation results
135132
"""
136133
# Import locally to avoid circular imports
137-
from .ragas_utils import load_jsonl_data, save_results_to_json
138-
from .ragas_visualization import generate_bar_chart
139134

140135
print("Setting up RAGAS evaluation...")
141136
print(f"Loading data from {jsonl_path}...")
@@ -198,7 +193,8 @@ async def evaluate_with_ragas(
198193
row_dict[mapped] = df.loc[idx, raw]
199194
# Attach llm_usage if supplied in original input sample
200195
if "llm_usage" in processed_data[idx]:
201-
row_dict["llm_usage"] = processed_data[idx]["llm_usage"]
196+
for key, val in processed_data[idx]["llm_usage"].items():
197+
row_dict[key] = val
202198
rows.append(row_dict)
203199

204200
results_df = pd.DataFrame(rows)
@@ -232,20 +228,6 @@ async def evaluate_with_ragas(
232228
print(f"Could not process RAGAS results with llm_usage: {e}")
233229
raise
234230

235-
# Save results and generate visualization
236-
if output_json_path:
237-
# Save results to JSON
238-
save_results_to_json(results_df, output_json_path)
239-
240-
# Generate visualization if not disabled
241-
if not skip_chart:
242-
try:
243-
chart_path = generate_bar_chart(output_json_path)
244-
if chart_path:
245-
print(f"Chart generated: {chart_path}")
246-
except Exception as e:
247-
print(f"Chart generation failed: {e}")
248-
249231
return results_df
250232

251233
except Exception as e:

backend/tests/Ragas/utils/ragas_evaluate.py

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,33 +9,30 @@
99
import sys
1010
import argparse
1111
import asyncio
12-
import pandas as pd
1312
from modules.ragas_evaluation import evaluate_with_ragas
1413

1514

16-
async def run_evaluation(input_path: str, output_path: str, skip_chart: bool = False) -> pd.DataFrame:
15+
async def run_evaluation(input_path: str, output_path: str, llm: str) -> None:
1716
"""
1817
Run the RAGAS evaluation process end-to-end
1918
2019
Args:
2120
input_path: Path to input JSONL file with responses
2221
output_path: Path to save JSON output results
23-
skip_chart: Whether to skip chart generation
24-
25-
Returns:
26-
DataFrame with evaluation results
22+
llm: The LLM model to use for evaluation
2723
"""
2824

2925
# Run RAGAS evaluation
3026
print(f"Running RAGAS evaluation on {input_path}...")
31-
results_df = await evaluate_with_ragas(input_path, output_path, skip_chart)
27+
results_df = await evaluate_with_ragas(input_path)
28+
results_df["llm"] = llm
29+
print("RAGAS evaluation completed.")
30+
print(f"Appending results to CSV file... {output_path}")
3231

33-
print(f"Evaluation complete! Results saved to {output_path}")
34-
if not skip_chart:
35-
chart_path = output_path.replace(".json", "_chart.png")
36-
print(f"Chart saved to {chart_path}")
32+
file_exists = os.path.isfile(output_path)
33+
results_df.to_csv(output_path, mode='a', header=not file_exists, index=False)
3734

38-
return results_df
35+
print("Results appended to CSV file.")
3936

4037

4138
async def main():
@@ -44,12 +41,13 @@ async def main():
4441
# Set up default file paths
4542
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # Navigate to Ragas root
4643
default_input = os.path.normpath(os.path.join(base_dir, "files/ragas_evaluation_with_responses.jsonl"))
47-
default_output = os.path.normpath(os.path.join(base_dir, "files/ragas_eval_result.json"))
44+
default_output = os.path.normpath(os.path.join(base_dir, "files/ragas_eval_result.csv"))
4845

4946
# Parse command line arguments
5047
parser = argparse.ArgumentParser(description="Evaluate responses using RAGAS metrics")
48+
parser.add_argument("--llm", "-l", help="LLM model to use for evaluation")
5149
parser.add_argument("--input", "-i", dest="input_jsonl", help="Path to input JSONL file", default=default_input)
52-
parser.add_argument("--output", "-o", help="Path to save JSON output", default=default_output)
50+
parser.add_argument("--output", "-o", help="Path to save CSV output", default=default_output)
5351
parser.add_argument("--no-chart", action="store_true", help="Skip chart visualization")
5452
args = parser.parse_args()
5553

@@ -60,12 +58,11 @@ async def main():
6058

6159
print(f"Input file: {args.input_jsonl}")
6260
print(f"Output file: {args.output}")
63-
if args.no_chart:
64-
print("Chart generation is disabled")
61+
print(f"LLM model: {args.llm}")
6562

6663
# Run evaluation
6764
try:
68-
await run_evaluation(args.input_jsonl, args.output, args.no_chart)
65+
await run_evaluation(args.input_jsonl, args.output, args.llm)
6966
except Exception as e:
7067
print(f"Error during evaluation: {str(e)}")
7168
sys.exit(1)

0 commit comments

Comments
 (0)