From 2436fa2d4576a6f757a2d8cb4f03884cce511d15 Mon Sep 17 00:00:00 2001
From: grencez <git@grencez.dev>
Date: Thu, 25 Jul 2024 03:31:28 -0700
Subject: [PATCH] update(llama.cpp): with Llama v3.1 model support

---
 dep/cmake_fetchcontent/llama_cpp.cmake        |  4 +--
 example/prompt/assistant_gemma/setting.sxpb   |  1 -
 example/prompt/assistant_llama/README.md      |  4 +++
 example/prompt/assistant_llama/priming.txt    |  8 +++++
 example/prompt/assistant_llama/setting.sxpb   | 32 +++++++++++++++++++
 example/prompt/assistant_mistral/setting.sxpb |  1 -
 6 files changed, 46 insertions(+), 4 deletions(-)
 create mode 100644 example/prompt/assistant_llama/README.md
 create mode 100644 example/prompt/assistant_llama/priming.txt
 create mode 100644 example/prompt/assistant_llama/setting.sxpb

diff --git a/dep/cmake_fetchcontent/llama_cpp.cmake b/dep/cmake_fetchcontent/llama_cpp.cmake
index be3f643..a5f8507 100644
--- a/dep/cmake_fetchcontent/llama_cpp.cmake
+++ b/dep/cmake_fetchcontent/llama_cpp.cmake
@@ -1,7 +1,7 @@
 FetchContent_Declare(
   LlamaCpp
-  GIT_REPOSITORY "https://github.com/ggerganov/llama.cpp.git"
-  GIT_TAG "50e05353e88d50b644688caa91f5955e8bdb9eb9"
+  GIT_REPOSITORY "https://github.com/jmorganca/llama.cpp.git"
+  GIT_TAG "0d3ce0904591ed3ad24e744ca4d7cae9af7853f8"
 )
 
 set(GGML_OPENMP FALSE CACHE BOOL "OpenMP off for compatibility.")
diff --git a/example/prompt/assistant_gemma/setting.sxpb b/example/prompt/assistant_gemma/setting.sxpb
index 2f3a684..e611db9 100644
--- a/example/prompt/assistant_gemma/setting.sxpb
+++ b/example/prompt/assistant_gemma/setting.sxpb
@@ -15,7 +15,6 @@
 )
 
 (x_priming "priming.txt")
-(x_rolling "rolling.txt")
 (o_rolling "../../../bld/example/prompt/assistant_gemma.txt")
 
 ; No starting space.
diff --git a/example/prompt/assistant_llama/README.md b/example/prompt/assistant_llama/README.md
new file mode 100644
index 0000000..6341a71
--- /dev/null
+++ b/example/prompt/assistant_llama/README.md
@@ -0,0 +1,4 @@
+# Gemma Assistant
+
+This example should be run with Gemma-style models that are tuned to behave like an instruction-following assistant chatbot.
+Most importantly, the model must have special `<start_of_turn>` and `<end_of_turn>` tokens.
diff --git a/example/prompt/assistant_llama/priming.txt b/example/prompt/assistant_llama/priming.txt
new file mode 100644
index 0000000..56dde27
--- /dev/null
+++ b/example/prompt/assistant_llama/priming.txt
@@ -0,0 +1,8 @@
+<|start_header_id|>system<|end_header_id|>
+
+Cutting Knowledge Date: December 2023
+
+You are a helpful assistant<|eot_id|>
+<|start_header_id|>user<|end_header_id|>
+
+Hello!<|eot_id|>
diff --git a/example/prompt/assistant_llama/setting.sxpb b/example/prompt/assistant_llama/setting.sxpb
new file mode 100644
index 0000000..4417de1
--- /dev/null
+++ b/example/prompt/assistant_llama/setting.sxpb
@@ -0,0 +1,32 @@
+(chat_prefixes (())
+ (m
+  (prefix "<|start_header_id|>user<|end_header_id|>\n\n")
+  (suffix "<|eot_id|>\n")
+ (m
+  (prefix "<|start_header_id|>assistant<|end_header_id|>\n\n")
+  (suffix "<|eot_id|>\n")
+ )
+)
+(substitution
+ (special_tokens (())
+  (() (name "<start_of_turn>"))
+  (() (name "<|start_header_id|>"))
+  (() (name "<|end_header_id|>"))
+  (() (name "<|eot_id|>"))
+ )
+)
+
+(x_priming "priming.txt")
+(o_rolling "../../../bld/example/prompt/assistant_llama.txt")
+
+; No starting space.
+(startspace_on +false)
+; No token penalization.
+(repeat_window 0)
+
+; 10 reasonably-long sentences at a time.
+(sentence_limit 10)
+(sentence_token_limit 100)
+
+; Limit context to avoid blowing up RAM on large context models.
+(model_token_limit 8000)
diff --git a/example/prompt/assistant_mistral/setting.sxpb b/example/prompt/assistant_mistral/setting.sxpb
index 4826ca4..1316beb 100644
--- a/example/prompt/assistant_mistral/setting.sxpb
+++ b/example/prompt/assistant_mistral/setting.sxpb
@@ -17,7 +17,6 @@
 )
 
 (x_priming "priming.txt")
-(x_rolling "rolling.txt")
 (o_rolling "../../../bld/example/prompt/assistant_mistral.txt")
 
 ; No starting space.