Merge pull request #61 from aymeric-roucher/main

LLM-as-a-judge cookbook
huggingface · Mar 19, 2024 · 5bcfd4b · 5bcfd4b
2 parents 5b999f6 + 72029f5
commit 5bcfd4b
Show file tree

Hide file tree

Showing 4 changed files with 743 additions and 11 deletions.
diff --git a/notebooks/en/_toctree.yml b/notebooks/en/_toctree.yml
@@ -32,4 +32,6 @@
     title: Create a legal preference dataset
   - local: semantic_cache_chroma_vector_database
     title: Implementing semantic cache to improve a RAG system.
+  - local: llm_judge
+    title: Using LLM-as-a-judge for an automated and versatile evaluation
 
diff --git a/notebooks/en/advanced_rag.ipynb b/notebooks/en/advanced_rag.ipynb
@@ -6,7 +6,7 @@
     "id": "hUCaGdAj9-9F"
    },
    "source": [
-    "# Advanced RAG on HuggingFace documentation using langchain\n",
+    "# Advanced RAG on HuggingFace documentation using LangChain\n",
     "_Authored by: [Aymeric Roucher](https://huggingface.co/m-ric)_"
    ]
   },
@@ -495,7 +495,9 @@
     "] + [query_vector]\n",
     "\n",
     "# fit the data (The index of transformed data corresponds to the index of the original data)\n",
-    "documents_projected = embedding_projector.fit_transform(np.array(embeddings_2d), init=\"pca\")"
+    "documents_projected = embedding_projector.fit_transform(\n",
+    "    np.array(embeddings_2d), init=\"pca\"\n",
+    ")"
    ]
   },
   {
@@ -544,7 +546,8 @@
     "    height=700,\n",
     ")\n",
     "fig.update_traces(\n",
-    "    marker=dict(opacity=1, line=dict(width=0, color=\"DarkSlateGrey\")), selector=dict(mode=\"markers\")\n",
+    "    marker=dict(opacity=1, line=dict(width=0, color=\"DarkSlateGrey\")),\n",
+    "    selector=dict(mode=\"markers\"),\n",
     ")\n",
     "fig.update_layout(\n",
     "    legend_title_text=\"<b>Chunk source</b>\",\n",
@@ -598,7 +601,9 @@
    "source": [
     "print(f\"\\nStarting retrieval for {user_query=}...\")\n",
     "retrieved_docs = KNOWLEDGE_VECTOR_DATABASE.similarity_search(query=user_query, k=5)\n",
-    "print(\"\\n==================================Top document==================================\")\n",
+    "print(\n",
+    "    \"\\n==================================Top document==================================\"\n",
+    ")\n",
     "print(retrieved_docs[0].page_content)\n",
     "print(\"==================================Metadata==================================\")\n",
     "print(retrieved_docs[0].metadata)"
@@ -679,7 +684,9 @@
     "    bnb_4bit_quant_type=\"nf4\",\n",
     "    bnb_4bit_compute_dtype=torch.bfloat16,\n",
     ")\n",
-    "model = AutoModelForCausalLM.from_pretrained(READER_MODEL_NAME, quantization_config=bnb_config)\n",
+    "model = AutoModelForCausalLM.from_pretrained(\n",
+    "    READER_MODEL_NAME, quantization_config=bnb_config\n",
+    ")\n",
     "tokenizer = AutoTokenizer.from_pretrained(READER_MODEL_NAME)\n",
     "\n",
     "READER_LLM = pipeline(\n",
@@ -880,7 +887,9 @@
     "    doc.page_content for doc in retrieved_docs\n",
     "]  # we only need the text of the documents\n",
     "context = \"\\nExtracted documents:\\n\"\n",
-    "context += \"\".join([f\"Document {str(i)}:::\\n\" + doc for i, doc in enumerate(retrieved_docs_text)])\n",
+    "context += \"\".join(\n",
+    "    [f\"Document {str(i)}:::\\n\" + doc for i, doc in enumerate(retrieved_docs_text)]\n",
+    ")\n",
     "\n",
     "final_prompt = RAG_PROMPT_TEMPLATE.format(\n",
     "    question=\"How to create a pipeline object?\", context=context\n",
@@ -949,7 +958,9 @@
     ") -> Tuple[str, List[LangchainDocument]]:\n",
     "    # Gather documents with retriever\n",
     "    print(\"=> Retrieving documents...\")\n",
-    "    relevant_docs = knowledge_index.similarity_search(query=question, k=num_retrieved_docs)\n",
+    "    relevant_docs = knowledge_index.similarity_search(\n",
+    "        query=question, k=num_retrieved_docs\n",
+    "    )\n",
     "    relevant_docs = [doc.page_content for doc in relevant_docs]  # keep only the text\n",
     "\n",
     "    # Optionally rerank results\n",
@@ -962,7 +973,9 @@
     "\n",
     "    # Build the final prompt\n",
     "    context = \"\\nExtracted documents:\\n\"\n",
-    "    context += \"\".join([f\"Document {str(i)}:::\\n\" + doc for i, doc in enumerate(relevant_docs)])\n",
+    "    context += \"\".join(\n",
+    "        [f\"Document {str(i)}:::\\n\" + doc for i, doc in enumerate(relevant_docs)]\n",
+    "    )\n",
     "\n",
     "    final_prompt = RAG_PROMPT_TEMPLATE.format(question=question, context=context)\n",
     "\n",
@@ -1221,6 +1234,9 @@
   }
  ],
  "metadata": {
+  "colab": {
+   "provenance": []
+  },
   "kernelspec": {
    "display_name": "ml2",
    "language": "python",
@@ -1237,9 +1253,6 @@
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.10.9"
-  },
-  "colab": {
-   "provenance": []
   }
  },
  "nbformat": 4,

diff --git a/notebooks/en/index.md b/notebooks/en/index.md
@@ -7,6 +7,7 @@ applications and solving various machine learning tasks using open-source tools
 
 Check out the recently added notebooks:
 
+- [Using LLM-as-a-judge 🧑‍⚖️ for an automated and versatile evaluation](llm_judge)
 - [Create a legal preference dataset](pipeline_notus_instructions_preferences_legal)
 - [Suggestions for Data Annotation with SetFit in Zero-shot Text Classification](labelling_feedback_setfit)
 - [Implementing semantic cache to improve a RAG system](semantic_cache_chroma_vector_database)