From 2436fa2d4576a6f757a2d8cb4f03884cce511d15 Mon Sep 17 00:00:00 2001 From: grencez Date: Thu, 25 Jul 2024 03:31:28 -0700 Subject: [PATCH] update(llama.cpp): with Llama v3.1 model support --- dep/cmake_fetchcontent/llama_cpp.cmake | 4 +-- example/prompt/assistant_gemma/setting.sxpb | 1 - example/prompt/assistant_llama/README.md | 4 +++ example/prompt/assistant_llama/priming.txt | 8 +++++ example/prompt/assistant_llama/setting.sxpb | 32 +++++++++++++++++++ example/prompt/assistant_mistral/setting.sxpb | 1 - 6 files changed, 46 insertions(+), 4 deletions(-) create mode 100644 example/prompt/assistant_llama/README.md create mode 100644 example/prompt/assistant_llama/priming.txt create mode 100644 example/prompt/assistant_llama/setting.sxpb diff --git a/dep/cmake_fetchcontent/llama_cpp.cmake b/dep/cmake_fetchcontent/llama_cpp.cmake index be3f643..a5f8507 100644 --- a/dep/cmake_fetchcontent/llama_cpp.cmake +++ b/dep/cmake_fetchcontent/llama_cpp.cmake @@ -1,7 +1,7 @@ FetchContent_Declare( LlamaCpp - GIT_REPOSITORY "https://github.com/ggerganov/llama.cpp.git" - GIT_TAG "50e05353e88d50b644688caa91f5955e8bdb9eb9" + GIT_REPOSITORY "https://github.com/jmorganca/llama.cpp.git" + GIT_TAG "0d3ce0904591ed3ad24e744ca4d7cae9af7853f8" ) set(GGML_OPENMP FALSE CACHE BOOL "OpenMP off for compatibility.") diff --git a/example/prompt/assistant_gemma/setting.sxpb b/example/prompt/assistant_gemma/setting.sxpb index 2f3a684..e611db9 100644 --- a/example/prompt/assistant_gemma/setting.sxpb +++ b/example/prompt/assistant_gemma/setting.sxpb @@ -15,7 +15,6 @@ ) (x_priming "priming.txt") -(x_rolling "rolling.txt") (o_rolling "../../../bld/example/prompt/assistant_gemma.txt") ; No starting space. diff --git a/example/prompt/assistant_llama/README.md b/example/prompt/assistant_llama/README.md new file mode 100644 index 0000000..6341a71 --- /dev/null +++ b/example/prompt/assistant_llama/README.md @@ -0,0 +1,4 @@ +# Gemma Assistant + +This example should be run with Gemma-style models that are tuned to behave like an instruction-following assistant chatbot. +Most importantly, the model must have special `` and `` tokens. diff --git a/example/prompt/assistant_llama/priming.txt b/example/prompt/assistant_llama/priming.txt new file mode 100644 index 0000000..56dde27 --- /dev/null +++ b/example/prompt/assistant_llama/priming.txt @@ -0,0 +1,8 @@ +<|start_header_id|>system<|end_header_id|> + +Cutting Knowledge Date: December 2023 + +You are a helpful assistant<|eot_id|> +<|start_header_id|>user<|end_header_id|> + +Hello!<|eot_id|> diff --git a/example/prompt/assistant_llama/setting.sxpb b/example/prompt/assistant_llama/setting.sxpb new file mode 100644 index 0000000..4417de1 --- /dev/null +++ b/example/prompt/assistant_llama/setting.sxpb @@ -0,0 +1,32 @@ +(chat_prefixes (()) + (m + (prefix "<|start_header_id|>user<|end_header_id|>\n\n") + (suffix "<|eot_id|>\n") + (m + (prefix "<|start_header_id|>assistant<|end_header_id|>\n\n") + (suffix "<|eot_id|>\n") + ) +) +(substitution + (special_tokens (()) + (() (name "")) + (() (name "<|start_header_id|>")) + (() (name "<|end_header_id|>")) + (() (name "<|eot_id|>")) + ) +) + +(x_priming "priming.txt") +(o_rolling "../../../bld/example/prompt/assistant_llama.txt") + +; No starting space. +(startspace_on +false) +; No token penalization. +(repeat_window 0) + +; 10 reasonably-long sentences at a time. +(sentence_limit 10) +(sentence_token_limit 100) + +; Limit context to avoid blowing up RAM on large context models. +(model_token_limit 8000) diff --git a/example/prompt/assistant_mistral/setting.sxpb b/example/prompt/assistant_mistral/setting.sxpb index 4826ca4..1316beb 100644 --- a/example/prompt/assistant_mistral/setting.sxpb +++ b/example/prompt/assistant_mistral/setting.sxpb @@ -17,7 +17,6 @@ ) (x_priming "priming.txt") -(x_rolling "rolling.txt") (o_rolling "../../../bld/example/prompt/assistant_mistral.txt") ; No starting space.