From c88faf50a150e3b92981dadde4f91a1e420c50e0 Mon Sep 17 00:00:00 2001
From: rnckp <mail@tv07.com>
Date: Mon, 1 Apr 2024 14:08:32 +0200
Subject: [PATCH 1/3] Fix typos and update several links

---
 notebooks/en/advanced_rag.ipynb | 92 ++++++++++++++++-----------------
 1 file changed, 46 insertions(+), 46 deletions(-)
diff --git a/notebooks/en/advanced_rag.ipynb b/notebooks/en/advanced_rag.ipynb
index d87f144e..3bbb5424 100644
--- a/notebooks/en/advanced_rag.ipynb
+++ b/notebooks/en/advanced_rag.ipynb
@@ -6,7 +6,7 @@
     "id": "hUCaGdAj9-9F"
    },
    "source": [
-    "# Advanced RAG on HuggingFace documentation using LangChain\n",
+    "# Advanced RAG on Hugging Face documentation using LangChain\n",
     "_Authored by: [Aymeric Roucher](https://huggingface.co/m-ric)_"
    ]
   },
@@ -18,9 +18,9 @@
    "source": [
     "This notebook demonstrates how you can build an advanced RAG (Retrieval Augmented Generation) for answering a user's question about a specific knowledge base (here, the HuggingFace documentation), using LangChain.\n",
     "\n",
-    "For an introduction to RAG, you can check [this other cookbook](rag_zephyr_langchain)!\n",
+    "For an introduction to RAG, you can check [this other cookbook](rag_zephyr_langchain.ipynb)!\n",
     "\n",
-    "RAG systems are complex, with many moving parts: here a RAG diagram, where we noted in blue all possibilities for system enhancement:\n",
+    "RAG systems are complex, with many moving parts: here is a RAG diagram, where we noted in blue all possibilities for system enhancement:\n",
     "\n",
     "<img src=\"https://huggingface.co/datasets/huggingface/cookbook-images/resolve/main/RAG_workflow.png\" height=\"700\">\n",
     "\n",
@@ -70,7 +70,7 @@
     "\n",
     "pd.set_option(\n",
     "    \"display.max_colwidth\", None\n",
-    ")  # this will be helpful when visualizing retriever outputs"
+    ")  # This will be helpful when visualizing retriever outputs."
    ]
   },
   {
@@ -122,13 +122,13 @@
     "\n",
     "These snippets will then be fed to the Reader Model to help it generate its answer.\n",
     "\n",
-    "So __our objective here is, given a user question, to find the most snippets from our knowledge base to answer that question.__\n",
+    "So __our objective here is, given a user question, to find the most relevant snippets from our knowledge base to answer that question.__\n",
     "\n",
     "This is a wide objective, it leaves open some questions. How many snippets should we retrieve? This parameter will be named `top_k`.\n",
     "\n",
     "How long should these snippets be? This is called the `chunk size`. There's no one-size-fits-all answers, but here are a few elements:\n",
     "- 🔀 Your `chunk size` is allowed to vary from one snippet to the other.\n",
-    "- Since there will always be some noise in your retrieval, increasing the `top_k` increases the chance to get relevant elements in your retrieved snippets. 🎯 Shooting more arrows increases your probability to hit your target.\n",
+    "- Since there will always be some noise in your retrieval, increasing the `top_k` increases the chance to get relevant elements in your retrieved snippets. 🎯 Shooting more arrows increases your probability of hitting your target.\n",
     "- Meanwhile, the summed length of your retrieved documents should not be too high: for instance, for most current models 16k tokens will probably drown your Reader model in information due to [Lost-in-the-middle phenomenon](https://huggingface.co/papers/2307.03172). 🎯 Give your reader model only the most relevant insights, not a huge pile of books!\n",
     "\n",
     "\n",
@@ -144,15 +144,15 @@
     "### 1.1 Split the documents into chunks\n",
     "\n",
     "- In this part, __we split the documents from our knowledge base into smaller chunks__ which will be the snippets on which the reader LLM will base its answer.\n",
-    "- The goal is to prepare a collection of **semantically relevant snippets**. So their size should be adapted to precise ideas: too small will truncate ideas, too large will dilute them.\n",
+    "- The goal is to prepare a collection of **semantically relevant snippets**. So their size should be adapted to precise ideas: too small will truncate ideas, and too large will dilute them.\n",
     "\n",
-    "💡 _Many options exist for text splitting: splitting on words, on sentence boundaries, recursive chunking that processes documents in a tree-like way to preserve structure information... To learn more about chunking, I recommend you read [this great notebook](https://github.com/FullStackRetrieval-com/RetrievalTutorials/blob/main/5_Levels_Of_Text_Splitting.ipynb) by Greg Kamradt._\n",
+    "💡 _Many options exist for text splitting: splitting on words, on sentence boundaries, recursive chunking that processes documents in a tree-like way to preserve structure information... To learn more about chunking, I recommend you read [this great notebook](https://github.com/FullStackRetrieval-com/RetrievalTutorials/blob/main/tutorials/LevelsOfTextSplitting/5_Levels_Of_Text_Splitting.ipynb) by Greg Kamradt._\n",
     "\n",
     "\n",
-    "- **Recursive chunking** breaks down the text into smaller parts step by step using a given list of separators sorted from the most important to the least important separator. If the first split doesn't give the right size or shape chunks, the method repeats itself on the new chunks using a different separator. For instance with the list of separators `[\"\\n\\n\", \"\\n\", \".\", \"\"]`:\n",
+    "- **Recursive chunking** breaks down the text into smaller parts step by step using a given list of separators sorted from the most important to the least important separator. If the first split doesn't give the right size or shape of chunks, the method repeats itself on the new chunks using a different separator. For instance with the list of separators `[\"\\n\\n\", \"\\n\", \".\", \"\"]`:\n",
     "    - The method will first break down the document wherever there is a double line break `\"\\n\\n\"`.\n",
     "    - Resulting documents will be split again on simple line breaks `\"\\n\"`, then on sentence ends `\".\"`.\n",
-    "    - And finally, if some chunks are still too big, they will be split whenever they overflow the maximum size.\n",
+    "    - Finally, if some chunks are still too big, they will be split whenever they overflow the maximum size.\n",
     "\n",
     "- With this method, the global structure is well preserved, at the expense of getting slight variations in chunk size.\n",
     "\n",
@@ -173,7 +173,7 @@
    "source": [
     "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
     "\n",
-    "# We use a hierarchical list of separators specifically tailored for splitting Markdown documents\n",
+    "# We use a hierarchical list of separators specifically tailored for splitting Markdown documents.\n",
     "# This list is taken from LangChain's MarkdownTextSplitter class.\n",
     "MARKDOWN_SEPARATORS = [\n",
     "    \"\\n#{1,6} \",\n",
@@ -188,10 +188,10 @@
     "]\n",
     "\n",
     "text_splitter = RecursiveCharacterTextSplitter(\n",
-    "    chunk_size=1000,  # the maximum number of characters in a chunk: we selected this value arbitrarily\n",
-    "    chunk_overlap=100,  # the number of characters to overlap between chunks\n",
-    "    add_start_index=True,  # If `True`, includes chunk's start index in metadata\n",
-    "    strip_whitespace=True,  # If `True`, strips whitespace from the start and end of every document\n",
+    "    chunk_size=1000,  # The maximum number of characters in a chunk: we selected this value arbitrarily.\n",
+    "    chunk_overlap=100,  # The number of characters to overlap between chunks.\n",
+    "    add_start_index=True,  # If `True`, includes chunk's start index in metadata.\n",
+    "    strip_whitespace=True,  # If `True`, strips whitespace from the start and end of every document.\n",
     "    separators=MARKDOWN_SEPARATORS,\n",
     ")\n",
     "\n",
@@ -206,9 +206,9 @@
     "id": "d5jJUMgb9-9M"
    },
    "source": [
-    "We also have to keep in mind that when embedding documents, we will use an embedding model that has accepts a certain maximum sequence length `max_seq_length`.\n",
+    "We also have to keep in mind that when embedding documents, we will use an embedding model that accepts a certain maximum sequence length `max_seq_length`.\n",
     "\n",
-    "So we should make sure that our chunk sizes are below this limit, because any longer chunk will be truncated before processing, thus losing relevancy."
+    "So we should make sure that our chunk sizes are below this limit because any longer chunk will be truncated before processing, thus losing relevancy."
    ]
   },
   {
@@ -269,7 +269,7 @@
     "tokenizer = AutoTokenizer.from_pretrained(\"thenlper/gte-small\")\n",
     "lengths = [len(tokenizer.encode(doc.page_content)) for doc in tqdm(docs_processed)]\n",
     "\n",
-    "# Plot the distrubution of document lengths, counted as the number of tokens\n",
+    "# Plot the distrubution of document lengths, counted as the number of tokens.\n",
     "fig = pd.Series(lengths).hist()\n",
     "plt.title(\"Distribution of document lengths in the knowledge base (in count of tokens)\")\n",
     "plt.show()"
@@ -354,7 +354,7 @@
     "    for doc in knowledge_base:\n",
     "        docs_processed += text_splitter.split_documents([doc])\n",
     "\n",
-    "    # Remove duplicates\n",
+    "    # Remove duplicates.\n",
     "    unique_texts = {}\n",
     "    docs_processed_unique = []\n",
     "    for doc in docs_processed:\n",
@@ -366,12 +366,12 @@
     "\n",
     "\n",
     "docs_processed = split_documents(\n",
-    "    512,  # We choose a chunk size adapted to our model\n",
+    "    512,  # We choose a chunk size adapted to our model.\n",
     "    RAW_KNOWLEDGE_BASE,\n",
     "    tokenizer_name=EMBEDDING_MODEL_NAME,\n",
     ")\n",
     "\n",
-    "# Let's visualize the chunk sizes we would have in tokens from a common model\n",
+    "# Let's visualize the chunk sizes we would have in tokens from a common model.\n",
     "from transformers import AutoTokenizer\n",
     "\n",
     "tokenizer = AutoTokenizer.from_pretrained(EMBEDDING_MODEL_NAME)\n",
@@ -398,23 +398,23 @@
    "source": [
     "### 1.2 Building the vector database\n",
     "\n",
-    "We want to compute the embeddings for all the chunks of our knowledge base: to learn more on sentence embeddings, we recommend reading [this guide](https://osanseviero.github.io/hackerllama/blog/posts/sentence_embeddings/).\n",
+    "We want to compute the embeddings for all the chunks of our knowledge base: to learn more about sentence embeddings, we recommend reading [this guide](https://osanseviero.github.io/hackerllama/blog/posts/sentence_embeddings/).\n",
     "\n",
-    "#### How does retrieval work ?\n",
+    "#### How does retrieval work?\n",
     "\n",
-    "Once the chunks are all embedded, we store them into a vector database. When the user types in a query, it gets embedded by the same model previously used, and a similarity search returns the closest documents from the vector database.\n",
+    "Once the chunks are all embedded, we store them in a vector database. When the user types in a query, it gets embedded by the same model previously used, and a similarity search returns the closest documents from the vector database.\n",
     "\n",
-    "The technical challenge is thus, given a query vector, to quickly find the nearest neighbours of this vector in the vector database. To do this, we need to choose two things: a distance, and a search algorithm to find the nearest neighbors quickly within a database of thousands of records.\n",
+    "The technical challenge is thus, given a query vector, to quickly find the nearest neighbors of this vector in the vector database. To do this, we need to choose two things: a distance, and a search algorithm to find the nearest neighbors quickly within a database of thousands of records.\n",
     "\n",
     "##### Nearest Neighbor search algorithm\n",
     "\n",
-    "There are plentiful choices for the nearest neighbor search algorithm: we go with Facebook's [FAISS](https://github.com/facebookresearch/faiss), since FAISS is performant enough for most use cases, and it is well known thus widely implemented.\n",
+    "There are plentiful choices for the nearest neighbor search algorithm: we go with Facebook's [FAISS](https://github.com/facebookresearch/faiss) since FAISS is performant enough for most use cases, and it is well known and thus widely implemented.\n",
     "\n",
     "##### Distances\n",
     "\n",
     "Regarding distances, you can find a good guide [here](https://osanseviero.github.io/hackerllama/blog/posts/sentence_embeddings/#distance-between-embeddings). In short:\n",
     "\n",
-    "- **Cosine similarity** computes similarity between two vectors as the cosinus of their relative angle: it allows us to compare vector directions are regardless of their magnitude. Using it requires to normalize all vectors, to rescale them into unit norm.\n",
+    "- **Cosine similarity** computes the similarity between two vectors as the cosinus of their relative angle: it allows us to compare vector directions regardless of their magnitude. Using it requires normalizing all vectors, to rescale them into unit norm.\n",
     "- **Dot product** takes into account magnitude, with the sometimes undesirable effect that increasing a vector's length will make it more similar to all others.\n",
     "- **Euclidean distance** is the distance between the ends of vectors.\n",
     "\n",
@@ -441,7 +441,7 @@
     "    model_name=EMBEDDING_MODEL_NAME,\n",
     "    multi_process=True,\n",
     "    model_kwargs={\"device\": \"cuda\"},\n",
-    "    encode_kwargs={\"normalize_embeddings\": True},  # set True for cosine similarity\n",
+    "    encode_kwargs={\"normalize_embeddings\": True},  # Set `True` for cosine similarity.\n",
     ")\n",
     "\n",
     "KNOWLEDGE_VECTOR_DATABASE = FAISS.from_documents(\n",
@@ -468,7 +468,7 @@
    },
    "outputs": [],
    "source": [
-    "# embed a user query in the same space\n",
+    "# Embed a user query in the same space.\n",
     "user_query = \"How to create a pipeline object?\"\n",
     "query_vector = embedding_model.embed_query(user_query)"
    ]
@@ -494,7 +494,7 @@
     "    for idx in range(len(docs_processed))\n",
     "] + [query_vector]\n",
     "\n",
-    "# fit the data (The index of transformed data corresponds to the index of the original data)\n",
+    "# Fit the data (the index of transformed data corresponds to the index of the original data).\n",
     "documents_projected = embedding_projector.fit_transform(\n",
     "    np.array(embeddings_2d), init=\"pca\"\n",
     ")"
@@ -532,7 +532,7 @@
     "    ]\n",
     ")\n",
     "\n",
-    "# visualize the embedding\n",
+    "# Visualize the embedding.\n",
     "fig = px.scatter(\n",
     "    df,\n",
     "    x=\"x\",\n",
@@ -565,9 +565,9 @@
     "<img src=\"https://huggingface.co/datasets/huggingface/cookbook-images/resolve/main/PaCMAP_embeddings.png\" height=\"700\">\n",
     "\n",
     "\n",
-    "➡️ On the graph above, you can see a spatial representation of the kowledge base documents. As the vector embeddings represent the document's meaning, their closeness in meaning should be reflected in their embedding's closeness.\n",
+    "➡️ On the graph above, you can see a spatial representation of the knowledge base documents. As the vector embeddings represent the document's meaning, their closeness in meaning should be reflected in their embedding's closeness.\n",
     "\n",
-    "The user query's embedding is also shown : we want to find the `k` document that have the closest meaning, thus we pick the `k` closest vectors.\n",
+    "The user query's embedding is also shown: we want to find the `k` documents that have the closest meaning, thus we pick the `k` closest vectors.\n",
     "\n",
     "In the LangChain vector database implementation, this search operation is performed by the method `vector_database.similarity_search(query)`.\n",
     "\n",
@@ -619,9 +619,9 @@
     "\n",
     "In this part, the __LLM Reader reads the retrieved context to formulate its answer.__\n",
     "\n",
-    "There are actually substeps that can all be tuned:\n",
+    "There are substeps that can all be tuned:\n",
     "1. The content of the retrieved documents is aggregated together into the \"context\", with many processing options like _prompt compression_.\n",
-    "2. The context and the user query are aggregated into a prompt then given to the LLM to generate its answer."
+    "2. The context and the user query are aggregated into a prompt and then given to the LLM to generate its answer."
    ]
   },
   {
@@ -632,8 +632,8 @@
    "source": [
     "### 2.1. Reader model\n",
     "\n",
-    "The choice of a reader model is important on a few aspects:\n",
-    "- the reader model's `max_seq_length` must accomodate our prompt, which includes the context output by the retriever call: the context consists in 5 documents of 512 tokens each, so we aim for a context length of 4k tokens at least.\n",
+    "The choice of a reader model is important in a few aspects:\n",
+    "- the reader model's `max_seq_length` must accommodate our prompt, which includes the context output by the retriever call: the context consists of 5 documents of 512 tokens each, so we aim for a context length of 4k tokens at least.\n",
     "- the reader model\n",
     "\n",
     "For this example, we chose [`HuggingFaceH4/zephyr-7b-beta`](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta), a small but powerful model.\n",
@@ -885,7 +885,7 @@
    "source": [
     "retrieved_docs_text = [\n",
     "    doc.page_content for doc in retrieved_docs\n",
-    "]  # we only need the text of the documents\n",
+    "]  # We only need the text of the documents.\n",
     "context = \"\\nExtracted documents:\\n\"\n",
     "context += \"\".join(\n",
     "    [f\"Document {str(i)}:::\\n\" + doc for i, doc in enumerate(retrieved_docs_text)]\n",
@@ -895,7 +895,7 @@
     "    question=\"How to create a pipeline object?\", context=context\n",
     ")\n",
     "\n",
-    "# Redact an answer\n",
+    "# Redact an answer.\n",
     "answer = READER_LLM(final_prompt)[0][\"generated_text\"]\n",
     "print(answer)"
    ]
@@ -956,14 +956,14 @@
     "    num_retrieved_docs: int = 30,\n",
     "    num_docs_final: int = 5,\n",
     ") -> Tuple[str, List[LangchainDocument]]:\n",
-    "    # Gather documents with retriever\n",
+    "    # Gather documents with retriever.\n",
     "    print(\"=> Retrieving documents...\")\n",
     "    relevant_docs = knowledge_index.similarity_search(\n",
     "        query=question, k=num_retrieved_docs\n",
     "    )\n",
-    "    relevant_docs = [doc.page_content for doc in relevant_docs]  # keep only the text\n",
+    "    relevant_docs = [doc.page_content for doc in relevant_docs]  # Keep only the text.\n",
     "\n",
-    "    # Optionally rerank results\n",
+    "    # Optionally rerank results.\n",
     "    if reranker:\n",
     "        print(\"=> Reranking documents...\")\n",
     "        relevant_docs = reranker.rerank(question, relevant_docs, k=num_docs_final)\n",
@@ -971,7 +971,7 @@
     "\n",
     "    relevant_docs = relevant_docs[:num_docs_final]\n",
     "\n",
-    "    # Build the final prompt\n",
+    "    # Build the final prompt.\n",
     "    context = \"\\nExtracted documents:\\n\"\n",
     "    context += \"\".join(\n",
     "        [f\"Document {str(i)}:::\\n\" + doc for i, doc in enumerate(relevant_docs)]\n",
@@ -979,7 +979,7 @@
     "\n",
     "    final_prompt = RAG_PROMPT_TEMPLATE.format(question=question, context=context)\n",
     "\n",
-    "    # Redact an answer\n",
+    "    # Redact an answer.\n",
     "    print(\"=> Generating answer...\")\n",
     "    answer = llm(final_prompt)[0][\"generated_text\"]\n",
     "\n",
@@ -1193,7 +1193,7 @@
     "id": "w6iNo7lY9-9S"
    },
    "source": [
-    "✅ We now have a fully functional, performant RAG sytem. That's it for today! Congratulations for making it to the end 🥳\n",
+    "✅ We now have a fully functional, performant RAG system. That's it for today! Congratulations for making it to the end 🥳\n",
     "\n",
     "\n",
     "# To go further 🗺️\n",
@@ -1202,7 +1202,7 @@
     "\n",
     "### Setting up an evaluation pipeline\n",
     "\n",
-    "- 💬 \"You cannot improve the model performance that you do not measure\", said Gandhi... or at least Llama2 told me he said it. Anyway, you should absolutely start by measuring performance: this means building a small evaluation dataset, then monitor the performance of your RAG system on this evaluation dataset.\n",
+    "- 💬 \"You cannot improve the model performance that you do not measure\", said Gandhi... or at least Llama2 told me he said it. Anyway, you should absolutely start by measuring performance: this means building a small evaluation dataset, and then monitor the performance of your RAG system on this evaluation dataset.\n",
     "\n",
     "### Improving the retriever\n",
     "\n",

From ed65f713e10c21b5c451fa388d5779c1cf2420a2 Mon Sep 17 00:00:00 2001
From: rnckp <mail@tv07.com>
Date: Mon, 8 Apr 2024 12:49:50 +0200
Subject: [PATCH 2/3] Remove addition of `.ipynb` that breaks the link

---
 notebooks/en/advanced_rag.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/notebooks/en/advanced_rag.ipynb b/notebooks/en/advanced_rag.ipynb
index 3bbb5424..9530cf6a 100644
--- a/notebooks/en/advanced_rag.ipynb
+++ b/notebooks/en/advanced_rag.ipynb
@@ -18,7 +18,7 @@
    "source": [
     "This notebook demonstrates how you can build an advanced RAG (Retrieval Augmented Generation) for answering a user's question about a specific knowledge base (here, the HuggingFace documentation), using LangChain.\n",
     "\n",
-    "For an introduction to RAG, you can check [this other cookbook](rag_zephyr_langchain.ipynb)!\n",
+    "For an introduction to RAG, you can check [this other cookbook](rag_zephyr_langchain)!\n",
     "\n",
     "RAG systems are complex, with many moving parts: here is a RAG diagram, where we noted in blue all possibilities for system enhancement:\n",
     "\n",

From 88ff73c4fd3e3d9a5e222f16d90b6d6c6aece594 Mon Sep 17 00:00:00 2001
From: rnckp <mail@tv07.com>
Date: Fri, 19 Apr 2024 20:19:55 +0200
Subject: [PATCH 3/3] Remove all dots at end of comments

- Fix typo in comment in code cell of first distribution plot
---
 notebooks/en/advanced_rag.ipynb | 46 ++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/notebooks/en/advanced_rag.ipynb b/notebooks/en/advanced_rag.ipynb
index 9530cf6a..3030a5e1 100644
--- a/notebooks/en/advanced_rag.ipynb
+++ b/notebooks/en/advanced_rag.ipynb
@@ -70,7 +70,7 @@
     "\n",
     "pd.set_option(\n",
     "    \"display.max_colwidth\", None\n",
-    ")  # This will be helpful when visualizing retriever outputs."
+    ")  # This will be helpful when visualizing retriever outputs"
    ]
   },
   {
@@ -173,8 +173,8 @@
    "source": [
     "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
     "\n",
-    "# We use a hierarchical list of separators specifically tailored for splitting Markdown documents.\n",
-    "# This list is taken from LangChain's MarkdownTextSplitter class.\n",
+    "# We use a hierarchical list of separators specifically tailored for splitting Markdown documents\n",
+    "# This list is taken from LangChain's MarkdownTextSplitter class\n",
     "MARKDOWN_SEPARATORS = [\n",
     "    \"\\n#{1,6} \",\n",
     "    \"```\\n\",\n",
@@ -188,10 +188,10 @@
     "]\n",
     "\n",
     "text_splitter = RecursiveCharacterTextSplitter(\n",
-    "    chunk_size=1000,  # The maximum number of characters in a chunk: we selected this value arbitrarily.\n",
-    "    chunk_overlap=100,  # The number of characters to overlap between chunks.\n",
-    "    add_start_index=True,  # If `True`, includes chunk's start index in metadata.\n",
-    "    strip_whitespace=True,  # If `True`, strips whitespace from the start and end of every document.\n",
+    "    chunk_size=1000,  # The maximum number of characters in a chunk: we selected this value arbitrarily\n",
+    "    chunk_overlap=100,  # The number of characters to overlap between chunks\n",
+    "    add_start_index=True,  # If `True`, includes chunk's start index in metadata\n",
+    "    strip_whitespace=True,  # If `True`, strips whitespace from the start and end of every document\n",
     "    separators=MARKDOWN_SEPARATORS,\n",
     ")\n",
     "\n",
@@ -259,7 +259,7 @@
    "source": [
     "from sentence_transformers import SentenceTransformer\n",
     "\n",
-    "# To get the value of the max sequence_length, we will query the underlying `SentenceTransformer` object used in the RecursiveCharacterTextSplitter.\n",
+    "# To get the value of the max sequence_length, we will query the underlying `SentenceTransformer` object used in the RecursiveCharacterTextSplitter\n",
     "print(\n",
     "    f\"Model's maximum sequence length: {SentenceTransformer('thenlper/gte-small').max_seq_length}\"\n",
     ")\n",
@@ -269,7 +269,7 @@
     "tokenizer = AutoTokenizer.from_pretrained(\"thenlper/gte-small\")\n",
     "lengths = [len(tokenizer.encode(doc.page_content)) for doc in tqdm(docs_processed)]\n",
     "\n",
-    "# Plot the distrubution of document lengths, counted as the number of tokens.\n",
+    "# Plot the distribution of document lengths, counted as the number of tokens\n",
     "fig = pd.Series(lengths).hist()\n",
     "plt.title(\"Distribution of document lengths in the knowledge base (in count of tokens)\")\n",
     "plt.show()"
@@ -354,7 +354,7 @@
     "    for doc in knowledge_base:\n",
     "        docs_processed += text_splitter.split_documents([doc])\n",
     "\n",
-    "    # Remove duplicates.\n",
+    "    # Remove duplicates\n",
     "    unique_texts = {}\n",
     "    docs_processed_unique = []\n",
     "    for doc in docs_processed:\n",
@@ -366,12 +366,12 @@
     "\n",
     "\n",
     "docs_processed = split_documents(\n",
-    "    512,  # We choose a chunk size adapted to our model.\n",
+    "    512,  # We choose a chunk size adapted to our model\n",
     "    RAW_KNOWLEDGE_BASE,\n",
     "    tokenizer_name=EMBEDDING_MODEL_NAME,\n",
     ")\n",
     "\n",
-    "# Let's visualize the chunk sizes we would have in tokens from a common model.\n",
+    "# Let's visualize the chunk sizes we would have in tokens from a common model\n",
     "from transformers import AutoTokenizer\n",
     "\n",
     "tokenizer = AutoTokenizer.from_pretrained(EMBEDDING_MODEL_NAME)\n",
@@ -441,7 +441,7 @@
     "    model_name=EMBEDDING_MODEL_NAME,\n",
     "    multi_process=True,\n",
     "    model_kwargs={\"device\": \"cuda\"},\n",
-    "    encode_kwargs={\"normalize_embeddings\": True},  # Set `True` for cosine similarity.\n",
+    "    encode_kwargs={\"normalize_embeddings\": True},  # Set `True` for cosine similarity\n",
     ")\n",
     "\n",
     "KNOWLEDGE_VECTOR_DATABASE = FAISS.from_documents(\n",
@@ -468,7 +468,7 @@
    },
    "outputs": [],
    "source": [
-    "# Embed a user query in the same space.\n",
+    "# Embed a user query in the same space\n",
     "user_query = \"How to create a pipeline object?\"\n",
     "query_vector = embedding_model.embed_query(user_query)"
    ]
@@ -494,7 +494,7 @@
     "    for idx in range(len(docs_processed))\n",
     "] + [query_vector]\n",
     "\n",
-    "# Fit the data (the index of transformed data corresponds to the index of the original data).\n",
+    "# Fit the data (the index of transformed data corresponds to the index of the original data)\n",
     "documents_projected = embedding_projector.fit_transform(\n",
     "    np.array(embeddings_2d), init=\"pca\"\n",
     ")"
@@ -532,7 +532,7 @@
     "    ]\n",
     ")\n",
     "\n",
-    "# Visualize the embedding.\n",
+    "# Visualize the embedding\n",
     "fig = px.scatter(\n",
     "    df,\n",
     "    x=\"x\",\n",
@@ -885,7 +885,7 @@
    "source": [
     "retrieved_docs_text = [\n",
     "    doc.page_content for doc in retrieved_docs\n",
-    "]  # We only need the text of the documents.\n",
+    "]  # We only need the text of the documents\n",
     "context = \"\\nExtracted documents:\\n\"\n",
     "context += \"\".join(\n",
     "    [f\"Document {str(i)}:::\\n\" + doc for i, doc in enumerate(retrieved_docs_text)]\n",
@@ -895,7 +895,7 @@
     "    question=\"How to create a pipeline object?\", context=context\n",
     ")\n",
     "\n",
-    "# Redact an answer.\n",
+    "# Redact an answer\n",
     "answer = READER_LLM(final_prompt)[0][\"generated_text\"]\n",
     "print(answer)"
    ]
@@ -956,14 +956,14 @@
     "    num_retrieved_docs: int = 30,\n",
     "    num_docs_final: int = 5,\n",
     ") -> Tuple[str, List[LangchainDocument]]:\n",
-    "    # Gather documents with retriever.\n",
+    "    # Gather documents with retriever\n",
     "    print(\"=> Retrieving documents...\")\n",
     "    relevant_docs = knowledge_index.similarity_search(\n",
     "        query=question, k=num_retrieved_docs\n",
     "    )\n",
-    "    relevant_docs = [doc.page_content for doc in relevant_docs]  # Keep only the text.\n",
+    "    relevant_docs = [doc.page_content for doc in relevant_docs]  # Keep only the text\n",
     "\n",
-    "    # Optionally rerank results.\n",
+    "    # Optionally rerank results\n",
     "    if reranker:\n",
     "        print(\"=> Reranking documents...\")\n",
     "        relevant_docs = reranker.rerank(question, relevant_docs, k=num_docs_final)\n",
@@ -971,7 +971,7 @@
     "\n",
     "    relevant_docs = relevant_docs[:num_docs_final]\n",
     "\n",
-    "    # Build the final prompt.\n",
+    "    # Build the final prompt\n",
     "    context = \"\\nExtracted documents:\\n\"\n",
     "    context += \"\".join(\n",
     "        [f\"Document {str(i)}:::\\n\" + doc for i, doc in enumerate(relevant_docs)]\n",
@@ -979,7 +979,7 @@
     "\n",
     "    final_prompt = RAG_PROMPT_TEMPLATE.format(question=question, context=context)\n",
     "\n",
-    "    # Redact an answer.\n",
+    "    # Redact an answer\n",
     "    print(\"=> Generating answer...\")\n",
     "    answer = llm(final_prompt)[0][\"generated_text\"]\n",
     "\n",