Skip to content

Commit

Permalink
async code improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
micsthepick committed Jul 2, 2024
1 parent 4f6faf3 commit 75e1027
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 90 deletions.
176 changes: 86 additions & 90 deletions llamacppasync.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,19 @@
import json
import sys
from pathlib import Path
from tqdm import tqdm
from math import log
from tqdm.asyncio import tqdm


# Configuration and Constants
HUNKSIZE = 4000
BATCHSIZE = 16
HUNKSIZE = 3696
BATCHSIZE = 128
# used model ctx size should be related to the above with the following eqn:
# CTXSIZE = BATCHSIZE*(HUNKSIZE/4+400/4), or alternatively HUNKSIZE = 4*CTXSIZE/BATCHSIZE-400
# BATCHSIZE = 16 and CTXSIZE = 65536 (HUNKSIZE = 15984) works best from experimentation on my 24GB 3090, (need to compress kv cache)
# with model Phi-3-mini-128k-instruct
# (BATCHSIZE = 16, CTXSIZE = 32768 (max), HUNKSIZE = 7792) with HelloBible

testing_key = 'Password12344321'
AUTH = os.getenv("OPENAI_AI_KEY", testing_key)
testing_api = "http://127.0.0.1:8080"
Expand Down Expand Up @@ -59,7 +66,7 @@ async def get_books(books=None, path="Bible-kjv"):
""" Generator to yield book name and its chapters. """
if not books:
books = ALL_BOOKS
for book in tqdm(books, 'books', leave=False):
for book in books:
file_path = Path(path).joinpath(f"{book.replace(' ', '')}.json")
try:
async with aiofiles.open(file_path, 'r') as file:
Expand All @@ -72,14 +79,12 @@ async def get_books(books=None, path="Bible-kjv"):
continue
yield book, get_chapters(book_object)

async def get_data(question, hunk):
return f"""<|user|>
Determine whether the Bible text is applicable for answering the provided question:
def get_data(question, hunk):
return f"""Determine whether the Bible text is applicable for answering the provided question:
QUESTION: {question}
TEXT: {hunk}
(Your Answer Must be 'yes' or 'no' without quotes)<|end|>
<|assistant|>
"""
ANSWER:"""

def get_score(value):
""" Convert raw score to a human-readable score. """
Expand All @@ -99,77 +104,78 @@ async def generate_tasks(question, book_filter):
if hunk:
yield (question, hunk, book, hunk_start, (chapter, verse))

async def process_batch(session, values, yes_token_id, no_token_id):
async def process(session, question, hunk, book, chapter_start, verse_start, chapter_end, verse_end, yes_token_id, no_token_id):
""" Send request to the API and get the response. """
data = {
"prompt": [await get_data(*value[:2]) for value in values],
"prompt": get_data(question, hunk),
"temperature": -1,
"n_predict": 1,
"logit_bias": [[i,False] for i in range(256*256) if i not in [yes_token_id, no_token_id]],
"n_probs": 2,
"add_bos_token": True,
"samplers": []
}

async with session.post(URL, headers=headers, json=data, ssl=False) as response:
response_json = await response.json()

if not isinstance(response_json, dict) or 'err' in response_json or 'error' in response_json:
tqdm.write(str(response_json))
print(str(response_json))
return {
"score": -999,
"error": response_json
}

retvals = []
for response, (question, contents_string, book, (chapter_start, verse_start), (chapter_end, verse_end)) in zip(response_json.get('results', []), values):
resp_completions = response.get("completion_probabilities", [{}])[0].get("probs", [])
if not resp_completions:
return {
"score": -999,
"error": "completion_probabilities not found"
}

resp_completions = response_json.get("completion_probabilities", [{}])[0].get("probs", [])
if not resp_completions:
return {
"score": -999,
"error": "completion_probabilities not found"
}

yes_raw = 0
no_raw = 0

for tok in resp_completions:
if not isinstance(tok, dict):
break
if tok.get("tok_str", "").strip() == yes_token.strip():
yes_raw = tok.get('prob', 0)
elif tok.get("tok_str", "").strip() == no_token.strip():
no_raw = tok.get('prob', 0)

if yes_raw is None:
yes_raw = 0
if no_raw is None:
no_raw = 0

for tok in resp_completions:
if not isinstance(tok, dict):
break
if tok.get("tok_str", "").strip() == yes_token.strip():
yes_raw = tok.get('prob', 0)
elif tok.get("tok_str", "").strip() == no_token.strip():
no_raw = tok.get('prob', 0)

if yes_raw is None:
yes_raw = 0
if no_raw is None:
no_raw = 0

if (yes_raw + no_raw) == 0:
score = 0
else:
score = yes_raw / (yes_raw + no_raw)

contents_string = f"{book} {chapter_start}:{verse_start}"
if chapter_start != chapter_end:
contents_string += f"-{chapter_end}:{verse_end}"
elif verse_start != verse_end:
contents_string += f"-{verse_end}"

retvals.append({
"score": score,
"question": question,
"book": book,
"ref": contents_string,
"chapter_start": chapter_start,
"verse_start": verse_start,
"chapter_end": chapter_end,
"verse_end": verse_end
})
return retvals
if (yes_raw + no_raw) == 0:
score = 0
else:
score = yes_raw / (yes_raw + no_raw)

contents_string = f"{book} {chapter_start}:{verse_start}"
if chapter_start != chapter_end:
contents_string += f"-{chapter_end}:{verse_end}"
elif verse_start != verse_end:
contents_string += f"-{verse_end}"

return {
"score": score,
"question": question,
"book": book,
"ref": contents_string,
"chapter_start": chapter_start,
"verse_start": verse_start,
"chapter_end": chapter_end,
"verse_end": verse_end
}

async def main():
semaphore = asyncio.Semaphore(BATCHSIZE)

async def process_considering_batchsize(*args):
async with semaphore:
return process(*args)

book_filter = None
if len(sys.argv) > 1:
book_filter = sys.argv[1:]
Expand All @@ -179,34 +185,30 @@ async def main():
while True:
question = input('Search Query (e.g. question or biblical statement): ')

base_len = len(get_data(question, ""))

if base_len > 400:
if input(f'{base_len-400} chars over limit! continue anyways? [Y/n]')[0].lower() == 'n':
continue

async with aiohttp.ClientSession() as session:
if (yes_token_id is None):
yes_token_id = await get_tok(session, yes_token)
if (no_token_id is None):
no_token_id = await get_tok(session, no_token)
scores = []
task_gen = generate_tasks(question, book_filter)
iterating = True
while iterating:
tasks = []
for _ in range(BATCHSIZE):
try:
val = await task_gen.__anext__()
tasks.append(val)
except StopAsyncIteration:
iterating = False
break
scores += await process_batch(session, tasks, yes_token_id, no_token_id)
tasks = []
async for question, hunk, book, (chapter_start, verse_start), (chapter_end, verse_end) in task_gen:
tasks.append(await process_considering_batchsize(session, question, hunk, book, chapter_start, verse_start, chapter_end, verse_end, yes_token_id, no_token_id))
scores = await tqdm.gather(*tasks, 'finding best hunks', leave=False)
n = 7
tqdm.write(f'Scores accumulated. Best {n} hunks to follow')
print(f'Scores accumulated. Best {n} hunks to follow')
best = sorted(scores, key=lambda x:-x['score'])[:n]
tqdm.write('\n'.join([f"{get_score(obj['score'])}: {obj['ref']}" for obj in best]))
print('\n'.join([f"{get_score(obj['score'])}: {obj['ref']}" for obj in best]))
for selection in best:
tqdm.write(f"Selecting hunk: {selection['ref']}")
specific_scores = []
print(f"Selecting hunk: {selection['ref']}")
texts = []
batch = []
batchi = 0
tasks = []
async for book, book_contents in get_books([selection['book']]):
async for chapter, chapter_contents in book_contents:
if chapter < selection['chapter_start']:
Expand All @@ -219,34 +221,28 @@ async def main():
elif chapter == selection['chapter_end'] and verse > selection['verse_end']:
break
texts.append(verse_text)
batch.append((question, verse_text, book, (chapter, verse), (chapter, verse)))
batchi += 1
if batchi >= BATCHSIZE:
specific_scores += await process_batch(session, batch, yes_token_id, no_token_id)
batch = []
batchi = 0
if batch:
specific_scores += await process_batch(session, batch, yes_token_id, no_token_id)
batch = []
tasks.append(await process_considering_batchsize(session, question, hunk, book, chapter_start, verse_start, chapter_end, verse_end, yes_token_id, no_token_id))
await asyncio.sleep(0.2)
specific_scores = await tqdm.gather(*tasks, 'finding best verses', leave=False)
nv = 3
top_indexes = sorted(range(len(specific_scores)), key=lambda x: specific_scores[x]['score'], reverse=True)[:nv]
tqdm.write(f"Best {nv} verses from hunk in {selection['book']}:")
print(f"Best {nv} verses from hunk in {selection['book']}:")
for i in top_indexes:
text = texts[i]
obj = specific_scores[i]
score = get_score(obj['score'])
ref = obj['ref']
tqdm.write(f' Score: {score}, Reference: {ref};')
tqdm.write(' ' + ' '.join(text.split('\n')))
print(f' Score: {score}, Reference: {ref};')
print(' ' + ' '.join(text.split('\n')))


try:
ALL_BOOKS = asyncio.run(load_books())
except FileNotFoundError:
tqdm.write(f"Error: The file {file_path} does not exist.")
print(f"Error: The file {file_path} does not exist.")
sys.exit(1)
except json.JSONDecodeError:
tqdm.write(f"Error: The file {file_path} is not a valid JSON file.")
print(f"Error: The file {file_path} is not a valid JSON file.")
sys.exit(1)

if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions start_hellobible.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
./llama-server --n-gpu-layers 99 -cb -tb 8 --ctx-size 32768 -np 16 -m ../text-generation-webui/models/Hello-Bible_Gabriel-7b-Instruct-v0.2/ggml-model-f16.gguf --api-key Password12344321
1 change: 1 addition & 0 deletions start_phi.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
./llama-server --no-mmap -ngl 99 -fa -ctk q4_0 -ctv q4_0 -c 131072 -np 64 -t 6 -m models/ggml-model-Q4_0.gguf --api-key Password12344321

0 comments on commit 75e1027

Please sign in to comment.