Skip to content

Commit

Permalink
batch tuning
Browse files Browse the repository at this point in the history
  • Loading branch information
micsthepick committed Jul 9, 2024
1 parent 01510c9 commit 9d6dd15
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 14 deletions.
14 changes: 7 additions & 7 deletions discordbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@


# Configuration and Constants
HUNKSIZE = 1648
BATCHSIZE = 64
HUNKSIZE = 15984
BATCHSIZE = 8
# used model interaction size should be related to
# the above with the following eqn:
# interactionSIZE = BATCHSIZE*(HUNKSIZE/4+400/4),
# or alternatively HUNKSIZE = 4*interactionSIZE/BATCHSIZE-400
# (BATCHSIZE = 32, interactionSIZE = 32768 (max), HUNKSIZE = 3696
# with HelloBible works well on my RTX 3090 with 24GB VRAM)
# CTXSIZE = BATCHSIZE*(HUNKSIZE/4+400/4),
# or alternatively HUNKSIZE = 4*CTXSIZE/BATCHSIZE-400
# (BATCHSIZE = 8, CTXSIZE = 32768 (max), HUNKSIZE = 15984
# with HelloBiblev0.2 works well on my RTX 3090 with 24GB VRAM)

testing_key = 'Password12344321'
AUTH = os.getenv("OPENAI_AI_KEY", testing_key)
Expand Down Expand Up @@ -542,7 +542,7 @@ async def do_search(interaction: discord.Interaction, generate_cb, book_sep, use
global yes_token_id, no_token_id
send_cb = interaction.edit_original_response
try:
print(f'{user_name} requested: ' + query)
print(f'{user_name} requested: {query} in {details[0]["title"]}')
await send_cb(content=f"Looking through {details[0]['title'] if details else 'everywhere'}. This may take a while!")
# only keep BATCHSIZE concurrent requests!
pbar = tqdm(total=BATCHSIZE, desc="queue progress", leave=False)
Expand Down
12 changes: 6 additions & 6 deletions llamacppasync.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@


# Configuration and Constants
HUNKSIZE = 3696
BATCHSIZE = 32
HUNKSIZE = 15984
BATCHSIZE = 8
# used model interaction size should be related to
# the above with the following eqn:
# interactionSIZE = BATCHSIZE*(HUNKSIZE/4+400/4),
# or alternatively HUNKSIZE = 4*interactionSIZE/BATCHSIZE-400
# (BATCHSIZE = 32, interactionSIZE = 32768 (max), HUNKSIZE = 3696
# with HelloBible works well on my RTX 3090 with 24GB VRAM)
# CTXSIZE = BATCHSIZE*(HUNKSIZE/4+400/4),
# or alternatively HUNKSIZE = 4*CTXSIZE/BATCHSIZE-400
# (BATCHSIZE = 8, CTXSIZE = 32768 (max), HUNKSIZE = 15984
# with HelloBiblev0.2 works well on my RTX 3090 with 24GB VRAM)

testing_key = 'Password12344321'
AUTH = os.getenv("OPENAI_AI_KEY", testing_key)
Expand Down
2 changes: 1 addition & 1 deletion start_hellobible.sh
Original file line number Diff line number Diff line change
@@ -1 +1 @@
./llama-server --n-gpu-layers 99 -cb -tb 8 --ctx-size 32768 -np 32 -m ../text-generation-webui/models/Hello-Bible_Gabriel-7b-Instruct-v0.2/ggml-model-f16.gguf --api-key Password12344321
./llama-server --n-gpu-layers 99 -cb -tb 8 --ctx-size 32768 -np 8 -m ../text-generation-webui/models/Hello-Bible_Gabriel-7b-Instruct-v0.2/ggml-model-f16.gguf --api-key Password12344321

0 comments on commit 9d6dd15

Please sign in to comment.