docs: Add end-to-end docs. Fix small bugs

iusztinpaul · iusztinpaul · commit e0f268d4765c · 2024-08-01T10:18:36.000+03:00
diff --git a/3-feature-pipeline/db.py b/3-feature-pipeline/db.py
@@ -62,8 +62,8 @@ def search(
         self,
         collection_name: str,
         query_vector: list,
-        query_filter: models.Filter,
-        limit: int,
+        query_filter: models.Filter | None = None,
+        limit: int = 3,
     ) -> list:
         return self._instance.search(
             collection_name=collection_name,
diff --git a/3-feature-pipeline/finetuning/generate_data.py b/3-feature-pipeline/finetuning/generate_data.py
@@ -4,7 +4,7 @@
 from comet_ml import Artifact, Experiment
 
 from utils.logging import get_logger
-from db.qdrant import QdrantDatabaseConnector
+from db import QdrantDatabaseConnector
 from finetuning.file_handler import FileHandler
 from finetuning.llm_communication import GptCommunicator
 from config import settings
@@ -126,7 +126,7 @@ def fetch_all_cleaned_content(self, collection_name: str) -> list:
     data_formatter = DataFormatter()
     dataset_generator = DatasetGenerator(file_handler, api_communicator, data_formatter)
 
-    collections = [("cleaned_articles", "articles"), ("cleaned_posts", "posts")]
+    collections = [("cleaned_articles", "articles"), ("cleaned_posts", "posts"), ("cleaned_repositories", "repositories")]
     for (collection_name, data_type) in collections:
         logger.info("Generating training data.", collection_name=collection_name, data_type=data_type)
         
diff --git a/3-feature-pipeline/finetuning/llm_communication.py b/3-feature-pipeline/finetuning/llm_communication.py
@@ -3,7 +3,7 @@
 from openai import OpenAI
 
 from utils.logging import get_logger
-from ..config import settings
+from config import settings
 
 MAX_LENGTH = 16384
 SYSTEM_PROMPT = (
diff --git a/3-feature-pipeline/llm/prompt_templates.py b/3-feature-pipeline/llm/prompt_templates.py
@@ -37,6 +37,7 @@ class SelfQueryTemplate(BasePromptTemplate):
     prompt: str = """You are an AI language model assistant. Your task is to extract information from a user question.
     The required information that needs to be extracted is the user or author id. 
     Your response should consists of only the extracted id (e.g. 1345256), nothing else.
+    If you cannot find the author id, return the string "None".
     User question: {question}"""
 
     def create_template(self) -> PromptTemplate:
diff --git a/3-feature-pipeline/rag/retriever.py b/3-feature-pipeline/rag/retriever.py
@@ -2,7 +2,7 @@
 
 from utils.logging import get_logger
 import utils
-from db.qdrant import QdrantDatabaseConnector
+from db import QdrantDatabaseConnector
 from qdrant_client import models
 from rag.query_expanison import QueryExpansion
 from rag.reranking import Reranker
@@ -27,7 +27,7 @@ def __init__(self, query: str):
         self._reranker = Reranker()
 
     def _search_single_query(
-        self, generated_query: str, metadata_filter_value: str, k: int
+        self, generated_query: str, metadata_filter_value: str | None, k: int
     ):
         assert k > 3, "k should be greater than 3"
 
@@ -44,7 +44,7 @@ def _search_single_query(
                             ),
                         )
                     ]
-                ),
+                ) if metadata_filter_value else None,
                 query_vector=query_vector,
                 limit=k // 3,
             ),
@@ -59,7 +59,7 @@ def _search_single_query(
                             ),
                         )
                     ]
-                ),
+                ) if metadata_filter_value else None,
                 query_vector=query_vector,
                 limit=k // 3,
             ),
@@ -74,7 +74,7 @@ def _search_single_query(
                             ),
                         )
                     ]
-                ),
+                ) if metadata_filter_value else None,
                 query_vector=query_vector,
                 limit=k // 3,
             ),
@@ -92,10 +92,13 @@ def retrieve_top_k(self, k: int, to_expand_to_n_queries: int) -> list:
         )
 
         author_id = self._metadata_extractor.generate_response(self.query)
-        logger.info(
-            "Successfully extracted the author_id from the query.",
-            author_id=author_id,
-        )
+        if author_id:
+            logger.info(
+                "Successfully extracted the author_id from the query.",
+                author_id=author_id,
+            )
+        else:
+            logger.info("Couldn't extract the author_id from the query.")
 
         with concurrent.futures.ThreadPoolExecutor() as executor:
             search_tasks = [
diff --git a/3-feature-pipeline/rag/self_query.py b/3-feature-pipeline/rag/self_query.py
@@ -6,7 +6,7 @@
 
 class SelfQuery:
     @staticmethod
-    def generate_response(query: str) -> str:
+    def generate_response(query: str) -> str | None:
         prompt = SelfQueryTemplate().create_template()
         model = ChatOpenAI(model=settings.OPENAI_MODEL_ID, temperature=0)
 
@@ -15,6 +15,9 @@ def generate_response(query: str) -> str:
         )
 
         response = chain.invoke({"question": query})
-        result = response["metadata_filter_value"]
+        result = response.get("metadata_filter_value", "none")
+        
+        if result.lower() == "none":
+            return None
 
         return result
diff --git a/3-feature-pipeline/retriever.py b/3-feature-pipeline/retriever.py
@@ -11,8 +11,6 @@
 if __name__ == "__main__":
     load_dotenv()
     query = """
-        Hello my author_id is 1.
-        
         Could you please draft a LinkedIn post discussing RAG systems?
         I'm particularly interested in how RAG works and how it is integrated with vector DBs and large language models (LLMs).
         """
diff --git a/4-finetuning/README.md b/4-finetuning/README.md
@@ -223,9 +223,9 @@ verbose: 0
 The project includes a `Makefile` for easy management of common tasks. Here are the main commands you can use:
 
 - `make help`: Displays help for each make command.
-- `make test`: Runs tests on local-qwak deployment.
+- `make local-test-inference-pipeline`: Runs tests on local-qwak deployment.
 - `make create-qwak-project`: Create a Qwak project to deploy the model.
-- `make deploy`: Triggers a new fine-tuning job to Qwak remotely, using the configuration specified in `build_config.yaml`
+- `make deploy-inference-pipeline`: Triggers a new fine-tuning job to Qwak remotely, using the configuration specified in `build_config.yaml`
 
 ------
 
diff --git a/INSTALL_AND_USAGE.md b/INSTALL_AND_USAGE.md
@@ -100,14 +100,47 @@ To check that the Qdrant `vector DB` is populated successfully, go to its dashbo
 ### Step 3: RAG retrieval step
 
 Now that we have some data in our vector DB, let's test out the RAG retriever:
+```shell
+make local-test-retriever
+```
+
+> [!NOTE]
+> Before running this command, check [Qdrant's dashboard](localhost:6333/dashboard) to ensure that your vector DB is populated with data.
+
 
 ### Step 4: Generate the instruct dataset
 
+The last step, before fine-tuning is to generate an instruct dataset and track it as an artifact in Comet ML. To do so, run:
+```shell
+make local-generate-dataset
+```
+
+> Now open [Comet ML](https://www.comet.com/signup/?utm_source=decoding_ml&utm_medium=partner&utm_content=github), go to your workspace, and open the `Artifacts` tab. There, you should find three artifacts as follows:
+> - `articles-instruct-dataset` 
+> - `posts-instruct-dataset`
+> - `repositories-instruct-dataset`
+
 
 ### Step 5: Fine-tuning
 
+For details on setting up the training pipeline on [Qwak](https://www.qwak.com/lp/end-to-end-mlops/?utm_source=github&utm_medium=referral&utm_campaign=decodingml) and running it, please referr to the [TRAINING]() document.
 
 ### Step 6: Inference
 
+After you finetuned your model, the first step is to deploy the inference pipeline to Qwak as a REST API service:
+```shell
+deploy-inference-pipeline 
+```
+
+> [!NOTE]
+> You can check out the progress of the deployment on [Qwak](https://www.qwak.com/lp/end-to-end-mlops/?utm_source=github&utm_medium=referral&utm_campaign=decodingml).
 
+After the deployment is finished (it will take a while) you can call it by calling:
+```shell
+make call-inference-pipeline
+```
 
+Ultimately, after you stop using it, make sure to delete the deployment by running:
+```shell
+make undeploy-infernece-pipeline
+```
diff --git a/Makefile b/Makefile
@@ -48,41 +48,52 @@ local-test-github: # Send test command on local to test the lambda with a Github
 	curl -X POST "http://localhost:9010/2015-03-31/functions/function/invocations" \
 	  	-d '{"user": "Paul Iuztin", "link": "https://github.com/decodingml/llm-twin-course"}'
 
-invoke: # Invoke remote lambda from local
+cloud-test-github: # Send command to the cloud lambda with a Github repository
 	aws lambda invoke \
 		--function-name crawler \
 		--cli-binary-format raw-in-base64-out \
-		--payload '{"user": "Paul Iuztin", "link": "https://github.com/iusztinpaul/hands-on-llms"}' \
+		--payload '{"user": "Paul Iuztin", "link": "https://github.com/decodingml/llm-twin-course"}' \
 		response.json
 
 # ------ RAG Feature Pipeline ------
 
-local-bytewax: # Run bytewax pipeline
+local-feature-pipeline: # Run the RAG feature pipeline
 	RUST_BACKTRACE=full poetry run python -m bytewax.run 3-feature-pipeline/main.py
 
-generate-dataset: # Generate dataset for finetuning and version it in Comet ML
-	python -m finetuning.generate_data
+local-generate-dataset: # Generate dataset for finetuning and version it in Comet ML
+	docker exec -it llm-twin-bytewax python -m finetuning.generate_data
 
 # ------ RAG ------
 
 local-test-retriever: # Test retriever
-	poetry run python retriever.py
+	docker exec -it llm-twin-bytewax python -m retriever
 
-# ------ Qwak: Fine-tuning & Inference ------
+# ------ Qwak: Training pipeline ------
 
 create-qwak-project: # Create Qwak project for serving the model
 	@echo "$(YELLOW)Creating Qwak project $(RESET)"
 	qwak models create "llm_twin" --project "llm-twin-course"
 
-deploy: # Deploy the model to Qwak
+local-test-training-pipeline: # Test Qwak model locally
+	poetry run python test_local.py
+
+deploy-training-pipeline: # Deploy the model to Qwak
 	@echo "$(YELLOW)Dumping poetry env requirements to $(RESET) $(GREEN) requirements.txt $(RESET)"
 	poetry export -f requirements.txt --output finetuning/requirements.txt --without-hashes
 	@echo "$(GREEN)Triggering Qwak Model Build$(RESET)"
 	poetry run qwak models build -f build_config.yaml .
 
-local-test-qwak: # Test Qwak model locally
-	poetry run python test_local.py
 
+# ------ Qwak: Inference pipeline ------
+
+deploy-inference-pipeline: # Deploy the inference pipeline to Qwak.
+	poetry run qwak models deploy realtime --model-id "llm_twin" --instance "gpu.a10.2xl" --timeout 50000 --replicas 2 --server-workers 2
+
+undeploy-infernece-pipeline: # Remove the inference pipeline deployment from Qwak.
+	poetry run qwak models undeploy --model-id "llm_twin"
+
+call-inference-pipeline: # Call the inference pipeline.
+	poetry run python main.py
 
 # ------ Superlinked Bonus Series ------
 
diff --git a/TRAINING.md b/TRAINING.md
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -35,7 +35,6 @@ pymongo = "^4.7.1"
 structlog = "^24.1.0"
 rich = "^13.7.1"
 pip = "^24.0"
-install = "^1.3.5"
 comet-ml = "^3.41.0"
 ruff = "^0.4.3"
 pandas = "^2.0.3"