batch tuning

micsthepick · Jul 9, 2024 · 9d6dd15 · 9d6dd15
1 parent 01510c9
commit 9d6dd15
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 14 deletions.
diff --git a/discordbot.py b/discordbot.py
@@ -18,14 +18,14 @@
 
 
 # Configuration and Constants
-HUNKSIZE = 1648
-BATCHSIZE = 64
+HUNKSIZE = 15984
+BATCHSIZE = 8
 # used model interaction size should be related to
 # the above with the following eqn:
-# interactionSIZE = BATCHSIZE*(HUNKSIZE/4+400/4),
-#   or alternatively HUNKSIZE = 4*interactionSIZE/BATCHSIZE-400
-# (BATCHSIZE = 32, interactionSIZE = 32768 (max), HUNKSIZE = 3696
-#   with HelloBible works well on my RTX 3090 with 24GB VRAM)
+# CTXSIZE = BATCHSIZE*(HUNKSIZE/4+400/4),
+#   or alternatively HUNKSIZE = 4*CTXSIZE/BATCHSIZE-400
+# (BATCHSIZE = 8, CTXSIZE = 32768 (max), HUNKSIZE = 15984
+#   with HelloBiblev0.2 works well on my RTX 3090 with 24GB VRAM)
 
 testing_key = 'Password12344321'
 AUTH = os.getenv("OPENAI_AI_KEY", testing_key)
@@ -542,7 +542,7 @@ async def do_search(interaction: discord.Interaction, generate_cb, book_sep, use
     global yes_token_id, no_token_id
     send_cb = interaction.edit_original_response
     try:
-        print(f'{user_name} requested: ' + query)
+        print(f'{user_name} requested: {query} in {details[0]["title"]}')
         await send_cb(content=f"Looking through {details[0]['title'] if details else 'everywhere'}. This may take a while!")
         # only keep BATCHSIZE concurrent requests!
         pbar = tqdm(total=BATCHSIZE, desc="queue progress", leave=False)

diff --git a/llamacppasync.py b/llamacppasync.py
@@ -11,14 +11,14 @@
 
 
 # Configuration and Constants
-HUNKSIZE = 3696
-BATCHSIZE = 32
+HUNKSIZE = 15984
+BATCHSIZE = 8
 # used model interaction size should be related to
 # the above with the following eqn:
-# interactionSIZE = BATCHSIZE*(HUNKSIZE/4+400/4),
-#   or alternatively HUNKSIZE = 4*interactionSIZE/BATCHSIZE-400
-# (BATCHSIZE = 32, interactionSIZE = 32768 (max), HUNKSIZE = 3696
-#   with HelloBible works well on my RTX 3090 with 24GB VRAM)
+# CTXSIZE = BATCHSIZE*(HUNKSIZE/4+400/4),
+#   or alternatively HUNKSIZE = 4*CTXSIZE/BATCHSIZE-400
+# (BATCHSIZE = 8, CTXSIZE = 32768 (max), HUNKSIZE = 15984
+#   with HelloBiblev0.2 works well on my RTX 3090 with 24GB VRAM)
 
 testing_key = 'Password12344321'
 AUTH = os.getenv("OPENAI_AI_KEY", testing_key)

diff --git a/start_hellobible.sh b/start_hellobible.sh
@@ -1 +1 @@
-./llama-server --n-gpu-layers 99 -cb -tb 8 --ctx-size 32768 -np 32 -m ../text-generation-webui/models/Hello-Bible_Gabriel-7b-Instruct-v0.2/ggml-model-f16.gguf --api-key Password12344321
+./llama-server --n-gpu-layers 99 -cb -tb 8 --ctx-size 32768 -np 8 -m ../text-generation-webui/models/Hello-Bible_Gabriel-7b-Instruct-v0.2/ggml-model-f16.gguf --api-key Password12344321
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		./llama-server --n-gpu-layers 99 -cb -tb 8 --ctx-size 32768 -np 32 -m ../text-generation-webui/models/Hello-Bible_Gabriel-7b-Instruct-v0.2/ggml-model-f16.gguf --api-key Password12344321
		./llama-server --n-gpu-layers 99 -cb -tb 8 --ctx-size 32768 -np 8 -m ../text-generation-webui/models/Hello-Bible_Gabriel-7b-Instruct-v0.2/ggml-model-f16.gguf --api-key Password12344321