Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
cgisky1980 committed Oct 14, 2023
2 parents f6539c3 + 5a14f41 commit b5fa0e3
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 64 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ qdrant/
.qdrant-initialized
run.bat
node_modules/
.vscode/
50 changes: 0 additions & 50 deletions .vscode/launch.json

This file was deleted.

7 changes: 0 additions & 7 deletions .vscode/settings.json

This file was deleted.

14 changes: 7 additions & 7 deletions assets/configs/Config.toml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[model]
path = "assets/models/RWKV-4-World-CHNtuned-3B-v1-20230625-ctx4096.st" # Path to the model.
quant = [] # Layers to be quantized.
token_chunk_size = 32 # Size of token chunk that is inferred at once. For high end GPUs, this could be 64 or 128 (faster).
head_chunk_size = 8192 # DO NOT modify this if you don't know what you are doing.
max_runtime_batch = 8 # The maximum batches that can be scheduled for inference at the same time.
max_batch = 2 # The maximum batches that are cached on GPU.
embed_layer = 2 # The (reversed) layer number whose output is used as embedding.
path = "assets/models/RWKV-4-World-0.4B-v1-20230529-ctx4096.st" # Path to the model.
quant = [] # Layers to be quantized.
token_chunk_size = 32 # Size of token chunk that is inferred at once. For high end GPUs, this could be 64 or 128 (faster).
head_chunk_size = 8192 # DO NOT modify this if you don't know what you are doing.
max_runtime_batch = 8 # The maximum batches that can be scheduled for inference at the same time.
max_batch = 2 # The maximum batches that are cached on GPU.
embed_layer = 2 # The (reversed) layer number whose output is used as embedding.

0 comments on commit b5fa0e3

Please sign in to comment.