Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 3 additions & 7 deletions backend/tests/Ragas/utils/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,10 @@ python enhanced_run_evaluation_pipeline.py /path/to/your/document.pdf 3
After running the enhanced pipeline which produces the `ragas_evaluation_with_responses.jsonl` file, you must run the RAGAS evaluation script:

```bash
python ragas_evaluate.py
python ragas_evaluate.py --llm 'name-of-llm'

# With custom input/output paths
python ragas_evaluate.py --input path/to/input.jsonl --output path/to/output.json

# Skip chart generation
python ragas_evaluate.py --no-chart
python ragas_evaluate.py --llm 'name-of-llm' --input path/to/input.jsonl --output path/to/output.csv
```

## Environment Variables
Expand Down Expand Up @@ -116,8 +113,7 @@ All output files are stored in `../files/`:

- `ragas_evaluation_dataset.jsonl`: Initial questions and references
- `ragas_evaluation_with_responses.jsonl`: Questions with API responses
- `ragas_eval_result.json`: Evaluation metrics as configured in RAGAS_METRICS (default: factual_correctness, semantic_similarity, answer_accuracy)
- `ragas_eval_result_chart.png`: Visualization of evaluation results
- `ragas_eval_result.csv`: CSV of evaluation metrics, which is appended to on each run. Includes a column for the LLM name to support retrospective graph generation.

## Troubleshooting

Expand Down
27 changes: 4 additions & 23 deletions backend/tests/Ragas/utils/modules/ragas_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import os
from pathlib import Path
from typing import Optional
import pandas as pd
from ragas import evaluate, EvaluationDataset, SingleTurnSample
from ragas.llms import LangchainLLMWrapper
Expand All @@ -15,6 +14,7 @@
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv
from .ragas_utils import load_jsonl_data


# Find the project root (where .env is located)
Expand Down Expand Up @@ -119,23 +119,17 @@ def create_ragas_llm():
return LangchainLLMWrapper(chat_model), ragas_embeddings


async def evaluate_with_ragas(
jsonl_path: str, output_json_path: Optional[str] = None, skip_chart: bool = False
) -> pd.DataFrame:
async def evaluate_with_ragas(jsonl_path: str) -> pd.DataFrame:
"""
Evaluate responses using RAGAS metrics

Args:
jsonl_path: Path to the input JSONL file with responses
output_json_path: Path to save the JSON results
skip_chart: Whether to skip generating the bar chart

Returns:
DataFrame with evaluation results
"""
# Import locally to avoid circular imports
from .ragas_utils import load_jsonl_data, save_results_to_json
from .ragas_visualization import generate_bar_chart

print("Setting up RAGAS evaluation...")
print(f"Loading data from {jsonl_path}...")
Expand Down Expand Up @@ -198,7 +192,8 @@ async def evaluate_with_ragas(
row_dict[mapped] = df.loc[idx, raw]
# Attach llm_usage if supplied in original input sample
if "llm_usage" in processed_data[idx]:
row_dict["llm_usage"] = processed_data[idx]["llm_usage"]
for key, val in processed_data[idx]["llm_usage"].items():
row_dict[key] = val
rows.append(row_dict)

results_df = pd.DataFrame(rows)
Expand Down Expand Up @@ -232,20 +227,6 @@ async def evaluate_with_ragas(
print(f"Could not process RAGAS results with llm_usage: {e}")
raise

# Save results and generate visualization
if output_json_path:
# Save results to JSON
save_results_to_json(results_df, output_json_path)

# Generate visualization if not disabled
if not skip_chart:
try:
chart_path = generate_bar_chart(output_json_path)
if chart_path:
print(f"Chart generated: {chart_path}")
except Exception as e:
print(f"Chart generation failed: {e}")

return results_df

except Exception as e:
Expand Down
32 changes: 14 additions & 18 deletions backend/tests/Ragas/utils/ragas_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,33 +9,30 @@
import sys
import argparse
import asyncio
import pandas as pd
from modules.ragas_evaluation import evaluate_with_ragas


async def run_evaluation(input_path: str, output_path: str, skip_chart: bool = False) -> pd.DataFrame:
async def run_evaluation(input_path: str, output_path: str, llm: str) -> None:
"""
Run the RAGAS evaluation process end-to-end

Args:
input_path: Path to input JSONL file with responses
output_path: Path to save JSON output results
skip_chart: Whether to skip chart generation

Returns:
DataFrame with evaluation results
llm: The LLM model to use for evaluation
"""

# Run RAGAS evaluation
print(f"Running RAGAS evaluation on {input_path}...")
results_df = await evaluate_with_ragas(input_path, output_path, skip_chart)
results_df = await evaluate_with_ragas(input_path)
results_df["llm"] = llm
print("RAGAS evaluation completed.")
print(f"Appending results to CSV file... {output_path}")

print(f"Evaluation complete! Results saved to {output_path}")
if not skip_chart:
chart_path = output_path.replace(".json", "_chart.png")
print(f"Chart saved to {chart_path}")
file_exists = os.path.isfile(output_path)
results_df.to_csv(output_path, mode="a", header=not file_exists, index=False)

return results_df
print("Results appended to CSV file.")


async def main():
Expand All @@ -44,13 +41,13 @@ async def main():
# Set up default file paths
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # Navigate to Ragas root
default_input = os.path.normpath(os.path.join(base_dir, "files/ragas_evaluation_with_responses.jsonl"))
default_output = os.path.normpath(os.path.join(base_dir, "files/ragas_eval_result.json"))
default_output = os.path.normpath(os.path.join(base_dir, "files/ragas_eval_result.csv"))

# Parse command line arguments
parser = argparse.ArgumentParser(description="Evaluate responses using RAGAS metrics")
parser.add_argument("--llm", "-l", help="LLM model to use for evaluation")
parser.add_argument("--input", "-i", dest="input_jsonl", help="Path to input JSONL file", default=default_input)
parser.add_argument("--output", "-o", help="Path to save JSON output", default=default_output)
parser.add_argument("--no-chart", action="store_true", help="Skip chart visualization")
parser.add_argument("--output", "-o", help="Path to save CSV output", default=default_output)
args = parser.parse_args()

# Validate input file
Expand All @@ -60,12 +57,11 @@ async def main():

print(f"Input file: {args.input_jsonl}")
print(f"Output file: {args.output}")
if args.no_chart:
print("Chart generation is disabled")
print(f"LLM model: {args.llm}")

# Run evaluation
try:
await run_evaluation(args.input_jsonl, args.output, args.no_chart)
await run_evaluation(args.input_jsonl, args.output, args.llm)
except Exception as e:
print(f"Error during evaluation: {str(e)}")
sys.exit(1)
Expand Down
Loading