Skip to content

Commit dbffd1f

Browse files
WIP, pass one at populate-json recipe
Signed-off-by: greg pereira <[email protected]>
1 parent 53af15b commit dbffd1f

File tree

11 files changed

+951
-0
lines changed

11 files changed

+951
-0
lines changed
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
MODEL_URL ?=
2+
MODEL_NAME ?=
3+
MODEL_DIR ?= models
4+
5+
.PHONY: download-model
6+
download-model:
7+
curl -H "Cache-Control: no-cache" -s -S -L -f $(MODEL_URL) -z $(MODEL_DIR)/$(MODEL_NAME) -o $(MODEL_DIR)/$(MODEL_NAME).tmp && \
8+
mv -f $(MODEL_NAME).tmp $(MODEL_NAME) 2>/dev/null || \
9+
rm -f $(MODEL_DIR)/$(MODEL_NAME).tmp $(MODEL_DIR)/$(MODEL_NAME)
10+
11+
.PHONY: download-model-mistral # default model
12+
download-model-mistral:
13+
MODEL_NAME=mistral-7b-instruct-v0.1.Q4_K_M.gguf MODEL_URL=https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf make -f Makefile download-model
14+
15+
.PHONY: download-default-json-grammar
16+
download-default-json-grammar:
17+
curl -sLO https://raw.githubusercontent.com/ggerganov/llama.cpp/master/grammars/json.gbnf && mv json.gbnf grammars/json.gbnf
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Steps:
2+
3+
1. begin local dev
4+
2. Work on datasources
5+
- connect with SRE teams to figure out ways we could get SRE tickets normalized into a training dataset easliy ingested by the model
6+
- scrape stackoverflow, stackexchange, and medium for training data
7+
3. deploy with langserve
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
aiohttp==3.9.5
2+
aiosignal==1.3.1
3+
annotated-types==0.6.0
4+
anyio==4.3.0
5+
attrs==23.2.0
6+
certifi==2024.2.2
7+
charset-normalizer==3.3.2
8+
dataclasses-json==0.6.4
9+
fastapi==0.110.2
10+
frozenlist==1.4.1
11+
h11==0.14.0
12+
httpcore==1.0.5
13+
httpx==0.27.0
14+
idna==3.7
15+
jsonpatch==1.33
16+
jsonpointer==2.4
17+
langchain==0.1.16
18+
langchain-community==0.0.34
19+
langchain-core==0.1.45
20+
langchain-text-splitters==0.0.1
21+
langserve==0.1.0
22+
langsmith==0.1.49
23+
marshmallow==3.21.1
24+
multidict==6.0.5
25+
mypy-extensions==1.0.0
26+
numpy==1.26.4
27+
orjson==3.10.1
28+
packaging==23.2
29+
pathlib==1.0.1
30+
pydantic==2.7.0
31+
pydantic_core==2.18.1
32+
PyYAML==6.0.1
33+
requests==2.31.0
34+
sniffio==1.3.1
35+
SQLAlchemy==2.0.29
36+
starlette==0.37.2
37+
tenacity==8.2.3
38+
typing-inspect==0.9.0
39+
typing_extensions==4.11.0
40+
urllib3==2.2.1
41+
validators==0.28.1
42+
yarl==1.9.4
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from langchain_community.llms import LlamaCpp
2+
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
3+
from langchain_core.prompts.prompt import PromptTemplate
4+
from langchain.schema.runnable import Runnable
5+
import json
6+
from pprint import pprint
7+
from pathlib import Path
8+
# from langchain_text_splitters import RecursiveJsonSplitter
9+
10+
model_path="/Users/gregpereirapereira/Documents/tech/work/red-hat/code/containers/ai-lab-recipes/models/mistral-7b-instruct-v0.1.Q4_K_M.gguf"
11+
n_gpu_layers = -1 # This has been compiled with METAL framework all GPU for mac ARM64
12+
n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU. Using default
13+
14+
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
15+
16+
def no_download_json_chain(file_name: str, input: str) -> Runnable:
17+
"""Return a runnable."""
18+
json_schema = json.loads(Path(f"schemas/{file_name}").read_text())
19+
json_schema_string = json.dumps(json_schema)
20+
print("schema tokens: ", len(json_schema_string))
21+
# dropping chunk splitting --> moving to a model with bigger token input
22+
# splitter = RecursiveJsonSplitter(max_chunk_size=300)
23+
# json_chunks = splitter.split_json(json_data=json_data)
24+
template = """
25+
The user a JSON schema, and some text. Return to me a JSON object based on schema and by selecting the appropriate selections of the user text.
26+
%JSON schema
27+
{json_schema}
28+
%User input:
29+
{input}
30+
"""
31+
template = template.format(json_schema=json_schema, input=input)
32+
print("Token usage: ", len(template))
33+
model = LlamaCpp(
34+
model_path=model_path,
35+
n_gpu_layers=n_gpu_layers,
36+
n_batch=n_batch,
37+
echo=True,
38+
callback_manager=callback_manager,
39+
verbose=True,
40+
max_tokens=4000,
41+
temperature=0.05,
42+
43+
)
44+
return model | template
45+
46+
# Examples for when running as a non langserve route / non Runnable
47+
# no_download_json_chain("fruit.json", "A red banana.")
48+
# no_download_json_chain("employee.json", "My name is Gregory Pereira. I work in the Emereging Technologies department and the Platform and Services team. I like apples.")
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
{
2+
"$schema": "http://json-schema.org/draft-07/schema#",
3+
"title": "Car",
4+
"type": "object",
5+
"properties": {
6+
"make": {
7+
"type": "string",
8+
"description": "The make or manufacturer of the car"
9+
},
10+
"model": {
11+
"type": "string",
12+
"description": "The model of the car"
13+
},
14+
"year": {
15+
"type": "integer",
16+
"minimum": 1900,
17+
"maximum": 2024,
18+
"description": "The manufacturing year of the car"
19+
},
20+
"color": {
21+
"type": "string",
22+
"description": "The color of the car"
23+
},
24+
"mileage": {
25+
"type": "number",
26+
"minimum": 0,
27+
"description": "The mileage of the car in kilometers"
28+
},
29+
"price": {
30+
"type": "number",
31+
"minimum": 0,
32+
"description": "The price of the car in USD"
33+
}
34+
},
35+
"required": ["make", "model", "year", "color", "mileage", "price"]
36+
}
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
{
2+
"$schema": "http://json-schema.org/draft-07/schema#",
3+
"title": "Computer",
4+
"type": "object",
5+
"properties": {
6+
"brand": {
7+
"type": "string",
8+
"description": "The brand or manufacturer of the computer"
9+
},
10+
"model": {
11+
"type": "string",
12+
"description": "The model of the computer"
13+
},
14+
"processor": {
15+
"type": "object",
16+
"description": "Details about the processor of the computer",
17+
"properties": {
18+
"manufacturer": {
19+
"type": "string",
20+
"description": "The manufacturer of the processor"
21+
},
22+
"model": {
23+
"type": "string",
24+
"description": "The model of the processor"
25+
},
26+
"cores": {
27+
"type": "integer",
28+
"minimum": 1,
29+
"description": "The number of processor cores"
30+
},
31+
"clock_speed": {
32+
"type": "number",
33+
"minimum": 0,
34+
"description": "The clock speed of the processor in GHz"
35+
}
36+
},
37+
"required": ["manufacturer", "model", "cores", "clock_speed"]
38+
},
39+
"ram": {
40+
"type": "object",
41+
"description": "Details about the RAM of the computer",
42+
"properties": {
43+
"size_gb": {
44+
"type": "number",
45+
"minimum": 0,
46+
"description": "The size of RAM in gigabytes"
47+
},
48+
"type": {
49+
"type": "string",
50+
"description": "The type of RAM (e.g., DDR4)"
51+
}
52+
},
53+
"required": ["size_gb", "type"]
54+
},
55+
"storage": {
56+
"type": "object",
57+
"description": "Details about the storage of the computer",
58+
"properties": {
59+
"type": {
60+
"type": "string",
61+
"description": "The type of storage (e.g., SSD, HDD)"
62+
},
63+
"capacity_gb": {
64+
"type": "number",
65+
"minimum": 0,
66+
"description": "The capacity of storage in gigabytes"
67+
}
68+
},
69+
"required": ["type", "capacity_gb"]
70+
},
71+
"price": {
72+
"type": "number",
73+
"minimum": 0,
74+
"description": "The price of the computer in USD"
75+
}
76+
},
77+
"required": ["brand", "model", "processor", "ram", "storage", "price"]
78+
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
{
2+
"$schema": "http://json-schema.org/draft-07/schema#",
3+
"$id": "https://github.com/containers/ai-lab-recipes/recipes/json-to-text/employee.schema.json",
4+
"title": "employee",
5+
"description": "Acme's Employee Information",
6+
"type": "object",
7+
"properties": {
8+
"name": {
9+
"description": "The employee's full name",
10+
"type": "string"
11+
},
12+
"employeeId": {
13+
"description": "The unique identifier for a product",
14+
"type": "integer"
15+
},
16+
"title": {
17+
"description": "An Identifier for what position the employee holds within the company",
18+
"type": "string"
19+
},
20+
"manager": {
21+
"description": "Who sits above the employee in the Org chart and is responsible for managing them.",
22+
"type": "object",
23+
"items": {
24+
"$ref": "#"
25+
}
26+
},
27+
"teams": {
28+
"description": "Which products, services or other initiatives is this employee responsible for contributing to.",
29+
"type": "array",
30+
"items": {
31+
"type": "object",
32+
"properties": {
33+
"teamName": {
34+
"description": "A name used to refer to and distinguish between teams.",
35+
"type": "string"
36+
},
37+
"teamId": {
38+
"description": "A unique integer used to identify a team.",
39+
"type": "integer"
40+
},
41+
"leader": {
42+
"description": "The one who is responsbile for guiding the team.",
43+
"items": {
44+
"$ref": "#"
45+
}
46+
},
47+
"description": {
48+
"description": "A short blurb giving information on the team.",
49+
"type": "string"
50+
}
51+
},
52+
"required": ["teamName", "teamId"]
53+
},
54+
"minItems": 1,
55+
"uniqueItems": true
56+
}
57+
},
58+
"required": ["employeeId", "title", "teams"]
59+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"$schema": "http://json-schema.org/draft-07/schema#",
3+
"title": "Fruit",
4+
"type": "object",
5+
"properties": {
6+
"name": {
7+
"type": "string",
8+
"description": "The name of the fruit"
9+
},
10+
"color": {
11+
"type": "string",
12+
"description": "The color of the fruit"
13+
}
14+
},
15+
"required": ["name", "color"]
16+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from fastapi import FastAPI
2+
from langserve import add_routes
3+
4+
from __main__ import no_download_json_chain
5+
6+
7+
app=FastAPI(
8+
title="Langchain Server",
9+
version="1.0",
10+
description="A simple API Server"
11+
)
12+
13+
add_routes(app, no_download_json_chain())
14+
15+
if __name__ == "__main__":
16+
import uvicorn
17+
18+
uvicorn.run(app, host="0.0.0.0", port=8001)
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import validators.url
2+
from urllib.error import URLError, HTTPError
3+
import urllib.request
4+
5+
def download_json_file(url: str, file_name: str):
6+
if validators.url(url):
7+
try:
8+
urllib.request.urlretrieve(url, file_name)
9+
except HTTPError as e:
10+
print(f"HTTP Error: {e.code}, {e.reason}")
11+
except URLError as e:
12+
print(f"URL Error: {e.reason}")
13+
except Exception as e:
14+
print(f"An unexpected error occurred: {e}")

0 commit comments

Comments
 (0)