update(llama.cpp): with Llama v3.1 model support

rendezqueue · Jul 25, 2024 · 2436fa2 · 2436fa2
1 parent 54030b1
commit 2436fa2
Show file tree

Hide file tree

Showing 6 changed files with 46 additions and 4 deletions.
diff --git a/dep/cmake_fetchcontent/llama_cpp.cmake b/dep/cmake_fetchcontent/llama_cpp.cmake
@@ -1,7 +1,7 @@
 FetchContent_Declare(
   LlamaCpp
-  GIT_REPOSITORY "https://github.com/ggerganov/llama.cpp.git"
-  GIT_TAG "50e05353e88d50b644688caa91f5955e8bdb9eb9"
+  GIT_REPOSITORY "https://github.com/jmorganca/llama.cpp.git"
+  GIT_TAG "0d3ce0904591ed3ad24e744ca4d7cae9af7853f8"
 )
 
 set(GGML_OPENMP FALSE CACHE BOOL "OpenMP off for compatibility.")

diff --git a/example/prompt/assistant_gemma/setting.sxpb b/example/prompt/assistant_gemma/setting.sxpb
@@ -15,7 +15,6 @@
 )
 
 (x_priming "priming.txt")
-(x_rolling "rolling.txt")
 (o_rolling "../../../bld/example/prompt/assistant_gemma.txt")
 
 ; No starting space.

diff --git a/example/prompt/assistant_llama/README.md b/example/prompt/assistant_llama/README.md
@@ -0,0 +1,4 @@
+# Gemma Assistant
+
+This example should be run with Gemma-style models that are tuned to behave like an instruction-following assistant chatbot.
+Most importantly, the model must have special `<start_of_turn>` and `<end_of_turn>` tokens.
diff --git a/example/prompt/assistant_llama/priming.txt b/example/prompt/assistant_llama/priming.txt
@@ -0,0 +1,8 @@
+<|start_header_id|>system<|end_header_id|>
+
+Cutting Knowledge Date: December 2023
+
+You are a helpful assistant<|eot_id|>
+<|start_header_id|>user<|end_header_id|>
+
+Hello!<|eot_id|>
diff --git a/example/prompt/assistant_llama/setting.sxpb b/example/prompt/assistant_llama/setting.sxpb
@@ -0,0 +1,32 @@
+(chat_prefixes (())
+ (m
+  (prefix "<|start_header_id|>user<|end_header_id|>\n\n")
+  (suffix "<|eot_id|>\n")
+ (m
+  (prefix "<|start_header_id|>assistant<|end_header_id|>\n\n")
+  (suffix "<|eot_id|>\n")
+ )
+)
+(substitution
+ (special_tokens (())
+  (() (name "<start_of_turn>"))
+  (() (name "<|start_header_id|>"))
+  (() (name "<|end_header_id|>"))
+  (() (name "<|eot_id|>"))
+ )
+)
+
+(x_priming "priming.txt")
+(o_rolling "../../../bld/example/prompt/assistant_llama.txt")
+
+; No starting space.
+(startspace_on +false)
+; No token penalization.
+(repeat_window 0)
+
+; 10 reasonably-long sentences at a time.
+(sentence_limit 10)
+(sentence_token_limit 100)
+
+; Limit context to avoid blowing up RAM on large context models.
+(model_token_limit 8000)
diff --git a/example/prompt/assistant_mistral/setting.sxpb b/example/prompt/assistant_mistral/setting.sxpb
@@ -17,7 +17,6 @@
 )
 
 (x_priming "priming.txt")
-(x_rolling "rolling.txt")
 (o_rolling "../../../bld/example/prompt/assistant_mistral.txt")
 
 ; No starting space.