Merge pull request #126 from sergiopaniego/main

Solved small issues when running RAG evaluation notebook in Colab
huggingface · Jun 25, 2024 · 536d4b6 · 536d4b6
2 parents aa0bbd7 + 9ee906d
commit 536d4b6
Showing 1 changed file with 9 additions and 3 deletions.
diff --git a/notebooks/en/rag_evaluation.ipynb b/notebooks/en/rag_evaluation.ipynb
@@ -43,7 +43,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install -q torch transformers transformers langchain sentence-transformers tqdm openpyxl openai pandas datasets"
+    "!pip install -q torch transformers transformers langchain sentence-transformers tqdm openpyxl openai pandas datasets langchain-community ragatouille"
    ]
   },
   {
@@ -843,7 +843,7 @@
     "- split after `n` words / character, but only on sentence boundaries\n",
     "- **recursive split** tries to preserve even more of the document structure, by processing it tree-like way, splitting first on the largest units (chapters) then recursively splitting on smaller units (paragraphs, sentences).\n",
     "\n",
-    "To learn more about chunking, I recommend you read [this great notebook](https://github.com/FullStackRetrieval-com/RetrievalTutorials/blob/main/5_Levels_Of_Text_Splitting.ipynb) by Greg Kamradt.\n",
+    "To learn more about chunking, I recommend you read [this great notebook](https://github.com/FullStackRetrieval-com/RetrievalTutorials/blob/main/tutorials/LevelsOfTextSplitting/5_Levels_Of_Text_Splitting.ipynb) by Greg Kamradt.\n",
     "\n",
     "[This space](https://huggingface.co/spaces/m-ric/chunk_visualizer) lets you visualize how different splitting options affect the chunks you get.\n",
     "\n",
@@ -1051,10 +1051,12 @@
     "\n",
     "repo_id = \"HuggingFaceH4/zephyr-7b-beta\"\n",
     "READER_MODEL_NAME = \"zephyr-7b-beta\"\n",
+    "HF_API_TOKEN = \"\"\n",
     "\n",
     "READER_LLM = HuggingFaceHub(\n",
     "    repo_id=repo_id,\n",
     "    task=\"text-generation\",\n",
+    "    huggingfacehub_api_token=HF_API_TOKEN,\n",
     "    model_kwargs={\n",
     "        \"max_new_tokens\": 512,\n",
     "        \"top_k\": 30,\n",
@@ -1142,6 +1144,8 @@
    },
    "outputs": [],
    "source": [
+    "from langchain_core.language_models import BaseChatModel\n",
+    "\n",
     "def run_rag_tests(\n",
     "    eval_dataset: datasets.Dataset,\n",
     "    llm: BaseChatModel,\n",
@@ -1245,7 +1249,9 @@
    "source": [
     "from langchain.chat_models import ChatOpenAI\n",
     "\n",
-    "eval_chat_model = ChatOpenAI(model=\"gpt-4-1106-preview\", temperature=0)\n",
+    "OPENAI_API_KEY = \"\"\n",
+    "\n",
+    "eval_chat_model = ChatOpenAI(model=\"gpt-4-1106-preview\", temperature=0, openai_api_key=OPENAI_API_KEY)\n",
     "evaluator_name = \"GPT4\"\n",
     "\n",
     "\n",