Skip to content

Commit

Permalink
optimize for hellobible
Browse files Browse the repository at this point in the history
  • Loading branch information
micsthepick committed Jul 2, 2024
1 parent 75e1027 commit 477c694
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 11 deletions.
19 changes: 9 additions & 10 deletions llamacppasync.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,10 @@

# Configuration and Constants
HUNKSIZE = 3696
BATCHSIZE = 128
BATCHSIZE = 32
# used model ctx size should be related to the above with the following eqn:
# CTXSIZE = BATCHSIZE*(HUNKSIZE/4+400/4), or alternatively HUNKSIZE = 4*CTXSIZE/BATCHSIZE-400
# BATCHSIZE = 16 and CTXSIZE = 65536 (HUNKSIZE = 15984) works best from experimentation on my 24GB 3090, (need to compress kv cache)
# with model Phi-3-mini-128k-instruct
# (BATCHSIZE = 16, CTXSIZE = 32768 (max), HUNKSIZE = 7792) with HelloBible
# (BATCHSIZE = 32, CTXSIZE = 32768 (max), HUNKSIZE = 3696) with HelloBible works well on my RTX 3090 with 24GB VRAM

testing_key = 'Password12344321'
AUTH = os.getenv("OPENAI_AI_KEY", testing_key)
Expand Down Expand Up @@ -80,11 +78,11 @@ async def get_books(books=None, path="Bible-kjv"):
yield book, get_chapters(book_object)

def get_data(question, hunk):
return f"""Determine whether the Bible text is applicable for answering the provided question:
return f"""[INST]Determine whether the Bible text is applicable for answering the provided question:
QUESTION: {question}
TEXT: {hunk}
(Your Answer Must be 'yes' or 'no' without quotes)<|end|>
ANSWER:"""
(Your Answer Must be 'yes' or 'no' without quotes)[/INST]
Answer:"""

def get_score(value):
""" Convert raw score to a human-readable score. """
Expand Down Expand Up @@ -200,7 +198,7 @@ async def process_considering_batchsize(*args):
tasks = []
async for question, hunk, book, (chapter_start, verse_start), (chapter_end, verse_end) in task_gen:
tasks.append(await process_considering_batchsize(session, question, hunk, book, chapter_start, verse_start, chapter_end, verse_end, yes_token_id, no_token_id))
scores = await tqdm.gather(*tasks, 'finding best hunks', leave=False)
scores = await tqdm.gather(*tasks, desc='finding best hunks', leave=False)
n = 7
print(f'Scores accumulated. Best {n} hunks to follow')
best = sorted(scores, key=lambda x:-x['score'])[:n]
Expand All @@ -222,8 +220,7 @@ async def process_considering_batchsize(*args):
break
texts.append(verse_text)
tasks.append(await process_considering_batchsize(session, question, hunk, book, chapter_start, verse_start, chapter_end, verse_end, yes_token_id, no_token_id))
await asyncio.sleep(0.2)
specific_scores = await tqdm.gather(*tasks, 'finding best verses', leave=False)
specific_scores = await tqdm.gather(*tasks, desc='finding best verses', leave=False)
nv = 3
top_indexes = sorted(range(len(specific_scores)), key=lambda x: specific_scores[x]['score'], reverse=True)[:nv]
print(f"Best {nv} verses from hunk in {selection['book']}:")
Expand All @@ -234,6 +231,8 @@ async def process_considering_batchsize(*args):
ref = obj['ref']
print(f' Score: {score}, Reference: {ref};')
print(' ' + ' '.join(text.split('\n')))
# kick prevention
await asyncio.sleep(0.5)


try:
Expand Down
2 changes: 1 addition & 1 deletion start_hellobible.sh
Original file line number Diff line number Diff line change
@@ -1 +1 @@
./llama-server --n-gpu-layers 99 -cb -tb 8 --ctx-size 32768 -np 16 -m ../text-generation-webui/models/Hello-Bible_Gabriel-7b-Instruct-v0.2/ggml-model-f16.gguf --api-key Password12344321
./llama-server --n-gpu-layers 99 -cb -tb 8 --ctx-size 32768 -np 32 -m ../text-generation-webui/models/Hello-Bible_Gabriel-7b-Instruct-v0.2/ggml-model-f16.gguf --api-key Password12344321
1 change: 1 addition & 0 deletions start_mistralv3.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
./llama-server -ngl 99 -fa -c 131072 -np 128 -t 8 -m models/yarn-mistral-7b-128k.Q4_K_M.gguf --api-key Password12344321

0 comments on commit 477c694

Please sign in to comment.