diff --git a/integration_tests/rag_evals/ragas_utils.py b/integration_tests/rag_evals/ragas_utils.py index 686aa46d..ad24859d 100644 --- a/integration_tests/rag_evals/ragas_utils.py +++ b/integration_tests/rag_evals/ragas_utils.py @@ -5,6 +5,8 @@ from .evaluator import Data, PredictedData +# set_llm_cache(SQLiteCache(database_path="./Cache/langchain_ragas_cache.db")) + def create_ragas_dataset(dataset: list[PredictedData]) -> EvaluationDataset: @@ -50,7 +52,9 @@ def ragas_dataset_to_eval(dataset: EvaluationDataset, file: str) -> list[Data]: def run_ragas_evaluations(dataset: EvaluationDataset): - evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini")) + evaluator_llm = LangchainLLMWrapper( + ChatOpenAI(model="gpt-4o-mini", temperature=0.0) + ) answer_correctness_metric = AnswerCorrectness( llm=evaluator_llm, diff --git a/integration_tests/rag_evals/requirements.txt b/integration_tests/rag_evals/requirements.txt index 9bc15a39..d52f8d37 100644 --- a/integration_tests/rag_evals/requirements.txt +++ b/integration_tests/rag_evals/requirements.txt @@ -4,3 +4,5 @@ seaborn mlflow ragas==0.2.7 langchain-openai +langchain-core +langchain-community