optimize for hellobible

micsthepick · Jul 2, 2024 · 477c694 · 477c694
1 parent 75e1027
commit 477c694
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 11 deletions.
diff --git a/llamacppasync.py b/llamacppasync.py
@@ -11,12 +11,10 @@
 
 # Configuration and Constants
 HUNKSIZE = 3696
-BATCHSIZE = 128
+BATCHSIZE = 32
 # used model ctx size should be related to the above with the following eqn:
 # CTXSIZE = BATCHSIZE*(HUNKSIZE/4+400/4), or alternatively HUNKSIZE = 4*CTXSIZE/BATCHSIZE-400
-# BATCHSIZE = 16 and CTXSIZE = 65536 (HUNKSIZE = 15984) works best from experimentation on my 24GB 3090, (need to compress kv cache)
-# with model Phi-3-mini-128k-instruct
-# (BATCHSIZE = 16, CTXSIZE = 32768 (max), HUNKSIZE = 7792) with HelloBible
+# (BATCHSIZE = 32, CTXSIZE = 32768 (max), HUNKSIZE = 3696) with HelloBible works well on my RTX 3090 with 24GB VRAM
 
 testing_key = 'Password12344321'
 AUTH = os.getenv("OPENAI_AI_KEY", testing_key)
@@ -80,11 +78,11 @@ async def get_books(books=None, path="Bible-kjv"):
         yield book, get_chapters(book_object)
 
 def get_data(question, hunk):
-    return f"""Determine whether the Bible text is applicable for answering the provided question:
+    return f"""[INST]Determine whether the Bible text is applicable for answering the provided question:
 QUESTION: {question}
 TEXT: {hunk}
-(Your Answer Must be 'yes' or 'no' without quotes)<|end|>
-ANSWER:"""
+(Your Answer Must be 'yes' or 'no' without quotes)[/INST]
+Answer:"""
 
 def get_score(value):
     """ Convert raw score to a human-readable score. """
@@ -200,7 +198,7 @@ async def process_considering_batchsize(*args):
             tasks = []
             async for question, hunk, book, (chapter_start, verse_start), (chapter_end, verse_end) in task_gen:
                 tasks.append(await process_considering_batchsize(session, question, hunk, book, chapter_start, verse_start, chapter_end, verse_end, yes_token_id, no_token_id))
-            scores = await tqdm.gather(*tasks, 'finding best hunks', leave=False)
+            scores = await tqdm.gather(*tasks, desc='finding best hunks', leave=False)
             n = 7
             print(f'Scores accumulated. Best {n} hunks to follow')
             best = sorted(scores, key=lambda x:-x['score'])[:n]
@@ -222,8 +220,7 @@ async def process_considering_batchsize(*args):
                                 break
                             texts.append(verse_text)
                             tasks.append(await process_considering_batchsize(session, question, hunk, book, chapter_start, verse_start, chapter_end, verse_end, yes_token_id, no_token_id))
-                await asyncio.sleep(0.2)
-                specific_scores = await tqdm.gather(*tasks, 'finding best verses', leave=False)
+                specific_scores = await tqdm.gather(*tasks, desc='finding best verses', leave=False)
                 nv = 3
                 top_indexes = sorted(range(len(specific_scores)), key=lambda x: specific_scores[x]['score'], reverse=True)[:nv]
                 print(f"Best {nv} verses from hunk in {selection['book']}:")
@@ -234,6 +231,8 @@ async def process_considering_batchsize(*args):
                     ref = obj['ref']
                     print(f'  Score: {score}, Reference: {ref};')
                     print('    ' + '    '.join(text.split('\n')))
+                # kick prevention
+                await asyncio.sleep(0.5)
 
 
 try:

diff --git a/start_hellobible.sh b/start_hellobible.sh
@@ -1 +1 @@
-./llama-server --n-gpu-layers 99 -cb -tb 8 --ctx-size 32768 -np 16 -m ../text-generation-webui/models/Hello-Bible_Gabriel-7b-Instruct-v0.2/ggml-model-f16.gguf --api-key Password12344321
+./llama-server --n-gpu-layers 99 -cb -tb 8 --ctx-size 32768 -np 32 -m ../text-generation-webui/models/Hello-Bible_Gabriel-7b-Instruct-v0.2/ggml-model-f16.gguf --api-key Password12344321
diff --git a/start_mistralv3.sh b/start_mistralv3.sh
@@ -0,0 +1 @@
+./llama-server -ngl 99 -fa -c 131072 -np 128 -t 8 -m models/yarn-mistral-7b-128k.Q4_K_M.gguf --api-key Password12344321
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		./llama-server --n-gpu-layers 99 -cb -tb 8 --ctx-size 32768 -np 16 -m ../text-generation-webui/models/Hello-Bible_Gabriel-7b-Instruct-v0.2/ggml-model-f16.gguf --api-key Password12344321
		./llama-server --n-gpu-layers 99 -cb -tb 8 --ctx-size 32768 -np 32 -m ../text-generation-webui/models/Hello-Bible_Gabriel-7b-Instruct-v0.2/ggml-model-f16.gguf --api-key Password12344321
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		./llama-server -ngl 99 -fa -c 131072 -np 128 -t 8 -m models/yarn-mistral-7b-128k.Q4_K_M.gguf --api-key Password12344321