Skip to content

Commit e0f268d

Browse files
committed
docs: Add end-to-end docs. Fix small bugs
1 parent 1ce2297 commit e0f268d

File tree

13 files changed

+81
-43
lines changed

13 files changed

+81
-43
lines changed

3-feature-pipeline/db.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,8 @@ def search(
6262
self,
6363
collection_name: str,
6464
query_vector: list,
65-
query_filter: models.Filter,
66-
limit: int,
65+
query_filter: models.Filter | None = None,
66+
limit: int = 3,
6767
) -> list:
6868
return self._instance.search(
6969
collection_name=collection_name,

3-feature-pipeline/finetuning/generate_data.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from comet_ml import Artifact, Experiment
55

66
from utils.logging import get_logger
7-
from db.qdrant import QdrantDatabaseConnector
7+
from db import QdrantDatabaseConnector
88
from finetuning.file_handler import FileHandler
99
from finetuning.llm_communication import GptCommunicator
1010
from config import settings
@@ -126,7 +126,7 @@ def fetch_all_cleaned_content(self, collection_name: str) -> list:
126126
data_formatter = DataFormatter()
127127
dataset_generator = DatasetGenerator(file_handler, api_communicator, data_formatter)
128128

129-
collections = [("cleaned_articles", "articles"), ("cleaned_posts", "posts")]
129+
collections = [("cleaned_articles", "articles"), ("cleaned_posts", "posts"), ("cleaned_repositories", "repositories")]
130130
for (collection_name, data_type) in collections:
131131
logger.info("Generating training data.", collection_name=collection_name, data_type=data_type)
132132

3-feature-pipeline/finetuning/llm_communication.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from openai import OpenAI
44

55
from utils.logging import get_logger
6-
from ..config import settings
6+
from config import settings
77

88
MAX_LENGTH = 16384
99
SYSTEM_PROMPT = (

3-feature-pipeline/llm/prompt_templates.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ class SelfQueryTemplate(BasePromptTemplate):
3737
prompt: str = """You are an AI language model assistant. Your task is to extract information from a user question.
3838
The required information that needs to be extracted is the user or author id.
3939
Your response should consists of only the extracted id (e.g. 1345256), nothing else.
40+
If you cannot find the author id, return the string "None".
4041
User question: {question}"""
4142

4243
def create_template(self) -> PromptTemplate:

3-feature-pipeline/rag/retriever.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from utils.logging import get_logger
44
import utils
5-
from db.qdrant import QdrantDatabaseConnector
5+
from db import QdrantDatabaseConnector
66
from qdrant_client import models
77
from rag.query_expanison import QueryExpansion
88
from rag.reranking import Reranker
@@ -27,7 +27,7 @@ def __init__(self, query: str):
2727
self._reranker = Reranker()
2828

2929
def _search_single_query(
30-
self, generated_query: str, metadata_filter_value: str, k: int
30+
self, generated_query: str, metadata_filter_value: str | None, k: int
3131
):
3232
assert k > 3, "k should be greater than 3"
3333

@@ -44,7 +44,7 @@ def _search_single_query(
4444
),
4545
)
4646
]
47-
),
47+
) if metadata_filter_value else None,
4848
query_vector=query_vector,
4949
limit=k // 3,
5050
),
@@ -59,7 +59,7 @@ def _search_single_query(
5959
),
6060
)
6161
]
62-
),
62+
) if metadata_filter_value else None,
6363
query_vector=query_vector,
6464
limit=k // 3,
6565
),
@@ -74,7 +74,7 @@ def _search_single_query(
7474
),
7575
)
7676
]
77-
),
77+
) if metadata_filter_value else None,
7878
query_vector=query_vector,
7979
limit=k // 3,
8080
),
@@ -92,10 +92,13 @@ def retrieve_top_k(self, k: int, to_expand_to_n_queries: int) -> list:
9292
)
9393

9494
author_id = self._metadata_extractor.generate_response(self.query)
95-
logger.info(
96-
"Successfully extracted the author_id from the query.",
97-
author_id=author_id,
98-
)
95+
if author_id:
96+
logger.info(
97+
"Successfully extracted the author_id from the query.",
98+
author_id=author_id,
99+
)
100+
else:
101+
logger.info("Couldn't extract the author_id from the query.")
99102

100103
with concurrent.futures.ThreadPoolExecutor() as executor:
101104
search_tasks = [

3-feature-pipeline/rag/self_query.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
class SelfQuery:
88
@staticmethod
9-
def generate_response(query: str) -> str:
9+
def generate_response(query: str) -> str | None:
1010
prompt = SelfQueryTemplate().create_template()
1111
model = ChatOpenAI(model=settings.OPENAI_MODEL_ID, temperature=0)
1212

@@ -15,6 +15,9 @@ def generate_response(query: str) -> str:
1515
)
1616

1717
response = chain.invoke({"question": query})
18-
result = response["metadata_filter_value"]
18+
result = response.get("metadata_filter_value", "none")
19+
20+
if result.lower() == "none":
21+
return None
1922

2023
return result

3-feature-pipeline/retriever.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111
if __name__ == "__main__":
1212
load_dotenv()
1313
query = """
14-
Hello my author_id is 1.
15-
1614
Could you please draft a LinkedIn post discussing RAG systems?
1715
I'm particularly interested in how RAG works and how it is integrated with vector DBs and large language models (LLMs).
1816
"""

4-finetuning/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,9 +223,9 @@ verbose: 0
223223
The project includes a `Makefile` for easy management of common tasks. Here are the main commands you can use:
224224

225225
- `make help`: Displays help for each make command.
226-
- `make test`: Runs tests on local-qwak deployment.
226+
- `make local-test-inference-pipeline`: Runs tests on local-qwak deployment.
227227
- `make create-qwak-project`: Create a Qwak project to deploy the model.
228-
- `make deploy`: Triggers a new fine-tuning job to Qwak remotely, using the configuration specified in `build_config.yaml`
228+
- `make deploy-inference-pipeline`: Triggers a new fine-tuning job to Qwak remotely, using the configuration specified in `build_config.yaml`
229229

230230
------
231231

INSTALL_AND_USAGE.md

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,14 +100,47 @@ To check that the Qdrant `vector DB` is populated successfully, go to its dashbo
100100
### Step 3: RAG retrieval step
101101

102102
Now that we have some data in our vector DB, let's test out the RAG retriever:
103+
```shell
104+
make local-test-retriever
105+
```
106+
107+
> [!NOTE]
108+
> Before running this command, check [Qdrant's dashboard](localhost:6333/dashboard) to ensure that your vector DB is populated with data.
109+
103110

104111
### Step 4: Generate the instruct dataset
105112

113+
The last step, before fine-tuning is to generate an instruct dataset and track it as an artifact in Comet ML. To do so, run:
114+
```shell
115+
make local-generate-dataset
116+
```
117+
118+
> Now open [Comet ML](https://www.comet.com/signup/?utm_source=decoding_ml&utm_medium=partner&utm_content=github), go to your workspace, and open the `Artifacts` tab. There, you should find three artifacts as follows:
119+
> - `articles-instruct-dataset`
120+
> - `posts-instruct-dataset`
121+
> - `repositories-instruct-dataset`
122+
106123

107124
### Step 5: Fine-tuning
108125

126+
For details on setting up the training pipeline on [Qwak](https://www.qwak.com/lp/end-to-end-mlops/?utm_source=github&utm_medium=referral&utm_campaign=decodingml) and running it, please referr to the [TRAINING]() document.
109127

110128
### Step 6: Inference
111129

130+
After you finetuned your model, the first step is to deploy the inference pipeline to Qwak as a REST API service:
131+
```shell
132+
deploy-inference-pipeline
133+
```
134+
135+
> [!NOTE]
136+
> You can check out the progress of the deployment on [Qwak](https://www.qwak.com/lp/end-to-end-mlops/?utm_source=github&utm_medium=referral&utm_campaign=decodingml).
112137
138+
After the deployment is finished (it will take a while) you can call it by calling:
139+
```shell
140+
make call-inference-pipeline
141+
```
113142

143+
Ultimately, after you stop using it, make sure to delete the deployment by running:
144+
```shell
145+
make undeploy-infernece-pipeline
146+
```

Makefile

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -48,41 +48,52 @@ local-test-github: # Send test command on local to test the lambda with a Github
4848
curl -X POST "http://localhost:9010/2015-03-31/functions/function/invocations" \
4949
-d '{"user": "Paul Iuztin", "link": "https://github.com/decodingml/llm-twin-course"}'
5050

51-
invoke: # Invoke remote lambda from local
51+
cloud-test-github: # Send command to the cloud lambda with a Github repository
5252
aws lambda invoke \
5353
--function-name crawler \
5454
--cli-binary-format raw-in-base64-out \
55-
--payload '{"user": "Paul Iuztin", "link": "https://github.com/iusztinpaul/hands-on-llms"}' \
55+
--payload '{"user": "Paul Iuztin", "link": "https://github.com/decodingml/llm-twin-course"}' \
5656
response.json
5757

5858
# ------ RAG Feature Pipeline ------
5959

60-
local-bytewax: # Run bytewax pipeline
60+
local-feature-pipeline: # Run the RAG feature pipeline
6161
RUST_BACKTRACE=full poetry run python -m bytewax.run 3-feature-pipeline/main.py
6262

63-
generate-dataset: # Generate dataset for finetuning and version it in Comet ML
64-
python -m finetuning.generate_data
63+
local-generate-dataset: # Generate dataset for finetuning and version it in Comet ML
64+
docker exec -it llm-twin-bytewax python -m finetuning.generate_data
6565

6666
# ------ RAG ------
6767

6868
local-test-retriever: # Test retriever
69-
poetry run python retriever.py
69+
docker exec -it llm-twin-bytewax python -m retriever
7070

71-
# ------ Qwak: Fine-tuning & Inference ------
71+
# ------ Qwak: Training pipeline ------
7272

7373
create-qwak-project: # Create Qwak project for serving the model
7474
@echo "$(YELLOW)Creating Qwak project $(RESET)"
7575
qwak models create "llm_twin" --project "llm-twin-course"
7676

77-
deploy: # Deploy the model to Qwak
77+
local-test-training-pipeline: # Test Qwak model locally
78+
poetry run python test_local.py
79+
80+
deploy-training-pipeline: # Deploy the model to Qwak
7881
@echo "$(YELLOW)Dumping poetry env requirements to $(RESET) $(GREEN) requirements.txt $(RESET)"
7982
poetry export -f requirements.txt --output finetuning/requirements.txt --without-hashes
8083
@echo "$(GREEN)Triggering Qwak Model Build$(RESET)"
8184
poetry run qwak models build -f build_config.yaml .
8285

83-
local-test-qwak: # Test Qwak model locally
84-
poetry run python test_local.py
8586

87+
# ------ Qwak: Inference pipeline ------
88+
89+
deploy-inference-pipeline: # Deploy the inference pipeline to Qwak.
90+
poetry run qwak models deploy realtime --model-id "llm_twin" --instance "gpu.a10.2xl" --timeout 50000 --replicas 2 --server-workers 2
91+
92+
undeploy-infernece-pipeline: # Remove the inference pipeline deployment from Qwak.
93+
poetry run qwak models undeploy --model-id "llm_twin"
94+
95+
call-inference-pipeline: # Call the inference pipeline.
96+
poetry run python main.py
8697

8798
# ------ Superlinked Bonus Series ------
8899

0 commit comments

Comments
 (0)