diff --git a/CMakeLists.txt b/CMakeLists.txt index 122e359..3342ee4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,6 +8,7 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_CXX_STANDARD 17) +option(BUILD_SHARED_LIBS "Build using shared libraries" OFF) option(LLAMA_OPENBLAS_ON "llama: use OpenBLAS" OFF) diff --git a/dep/cmake_fetchcontent/fildesh.cmake b/dep/cmake_fetchcontent/fildesh.cmake index 5b645c0..ba53f8c 100644 --- a/dep/cmake_fetchcontent/fildesh.cmake +++ b/dep/cmake_fetchcontent/fildesh.cmake @@ -1,7 +1,7 @@ FetchContent_Declare( Fildesh GIT_REPOSITORY "https://github.com/fildesh/fildesh.git" - GIT_TAG "9defdbec27700e01888d0834015e28947baba7cd" + GIT_TAG "e9421ebbb546cf38bf2051d02fdc38dcf71f8216" ) FetchContent_MakeAvailable(Fildesh) set(Fildesh_INCLUDE_DIRS ${Fildesh_INCLUDE_DIRS} PARENT_SCOPE) diff --git a/dep/cmake_fetchcontent/llama_cpp.cmake b/dep/cmake_fetchcontent/llama_cpp.cmake index 779897b..be3f643 100644 --- a/dep/cmake_fetchcontent/llama_cpp.cmake +++ b/dep/cmake_fetchcontent/llama_cpp.cmake @@ -1,11 +1,14 @@ FetchContent_Declare( LlamaCpp GIT_REPOSITORY "https://github.com/ggerganov/llama.cpp.git" - GIT_TAG "621e86b331f8b0e71f79fd82a4ae1cd54c3e4396" + GIT_TAG "50e05353e88d50b644688caa91f5955e8bdb9eb9" ) + +set(GGML_OPENMP FALSE CACHE BOOL "OpenMP off for compatibility.") FetchContent_MakeAvailable(LlamaCpp) + set(LlamaCpp_SOURCE_DIR "${llamacpp_SOURCE_DIR}" PARENT_SCOPE) -set(LlamaCpp_INCLUDE_DIRS "${llamacpp_SOURCE_DIR}" PARENT_SCOPE) +set(LlamaCpp_INCLUDE_DIRS "${llamacpp_SOURCE_DIR}/include" PARENT_SCOPE) set(LlamaCpp_LIBRARIES "$" PARENT_SCOPE) if (LLAMA_OPENBLAS_ON) diff --git a/doc/setting/model.md b/doc/setting/model.md index 1ce5a30..2f82cdf 100644 --- a/doc/setting/model.md +++ b/doc/setting/model.md @@ -5,14 +5,11 @@ I prefer using flags to specify model files. - `--model ggml-model-q4_0.gguf` are the model weights. Usually quantized. - Required. - `--lora ggml-adapter-model.gguf` gives a LoRA. -- `--lora_base ggml-model-f16.gguf` gives higher-precision model weights to apply the LoRA on top of. - - Not required when using `--lora` but you'll otherwise get a warning if the `--model` weights are low-precision. Even though the flags are preferred, `setting.sxpb` supports them too: ```lisp (model "ggml-model-q4_0.gguf") (lora "ggml-adapter-model.gguf") -(lora_base "ggml-model-f16.gguf") ``` ## Context diff --git a/example/prompt/README.md b/example/prompt/README.md index af03215..cb43a64 100644 --- a/example/prompt/README.md +++ b/example/prompt/README.md @@ -7,12 +7,11 @@ In order of interest: - Demonstrates why LLMs are hard to get right. - [confidant_alpaca](confidant_alpaca/): A camelid that occasionally spits. - Demonstrates a method of prompting instruction-tuned models to fill in character dialogue. -- [assistant_alpaca](assistant_alpaca/): Instruction-following AI assistant. - - Minimial prompt that lets an Alpaca-style model do its thing. - - Only works with models that end the assistant's message with an EOS token. -- [assistant_chatml](assistant_chatml/): Instruction-following AI assistant. - - Minimial prompt that lets an ChatML-style model do its thing. - - Only works with models that have special `<|im_start|>` and `<|im_end|>` tokens. +- Instruction-following AI assistants. + - [assistant_alpaca](assistant_alpaca/): Alpaca prompt format. + - [assistant_chatml](assistant_chatml/): ChatML prompt format that requires special `<|im_start|>` and `<|im_end|>` tokens. + - [assistant_gemma](assistant_gemma/): Gemma prompt format requires special `` and `` tokens. + - [assistant_mistral](assistant_mistral/): Mistral propmt format that requires special `[INST]` and `[/INST]` tokens. - [assistant_vicuna](assistant_vicuna/): Conversational AI assistant. - Minimial prompt that lets a Vicuna-style model do its thing. - Only works with models that end the assistant's message with an EOS token. diff --git a/example/prompt/assistant_alpaca/setting.sxpb b/example/prompt/assistant_alpaca/setting.sxpb index c071e0b..05faafd 100644 --- a/example/prompt/assistant_alpaca/setting.sxpb +++ b/example/prompt/assistant_alpaca/setting.sxpb @@ -1,18 +1,18 @@ -(((chat_prefixes)) +((chat_prefixes) (m - (prefix "### Instruction:\n") - (suffix "\n\n")) + (prefix "### Instruction:\n") + (suffix "\n\n")) (m - (prefix "### Response:\n") - ; Model must be fine-tuned to end the response with EOS token. - (suffix "\n\n") + (prefix "### Response:\n") + ; Model must be fine-tuned to end the response with EOS token. + (suffix "\n\n") ) ) (substitution - (eos_token_alias "") + (eos_token_alias "") ) ; Lines are considered as sentences. -((sentence_terminals) "\n") +(sentence_terminals () "\n") ; Max 10 lines at a time. (sentence_limit 10) (sentence_token_limit 1000) diff --git a/example/prompt/assistant_chatml/setting.sxpb b/example/prompt/assistant_chatml/setting.sxpb index 7b6003b..1d36392 100644 --- a/example/prompt/assistant_chatml/setting.sxpb +++ b/example/prompt/assistant_chatml/setting.sxpb @@ -1,20 +1,20 @@ -(((chat_prefixes)) +((chat_prefixes) (m - (prefix "<|im_start|>user\n") - (suffix "<|im_end|>\n")) + (prefix "<|im_start|>user\n") + (suffix "<|im_end|>\n")) (m - (prefix "<|im_start|>assistant\n") - (suffix "<|im_end|>\n") + (prefix "<|im_start|>assistant\n") + (suffix "<|im_end|>\n") ) ) (substitution - ; Uncomment the next 2 lines if your model doesn't support ChatML tokens. - ;(bos_token_alias "<|im_start|>") - ;(eos_token_alias "<|im_end|>") - ((special_tokens) - (() (name "<|im_start|>")) - (() (name "<|im_end|>")) - ) + ; Uncomment the next 2 lines if your model doesn't support ChatML tokens. + ;(bos_token_alias "<|im_start|>") + ;(eos_token_alias "<|im_end|>") + (special_tokens (()) + (() (name "<|im_start|>")) + (() (name "<|im_end|>")) + ) ) (x_priming "priming.txt") diff --git a/example/prompt/assistant_coprocess/setting.sxpb b/example/prompt/assistant_coprocess/setting.sxpb index 79ebc26..210b8f1 100644 --- a/example/prompt/assistant_coprocess/setting.sxpb +++ b/example/prompt/assistant_coprocess/setting.sxpb @@ -1,7 +1,7 @@ ; Bot name is replaced in the prompt. (confidant "Banterbot") (substitution - (confidant_alias "{{char}}") + (confidant_alias "{{char}}") ) (thread_count 2) diff --git a/example/prompt/assistant_gemma/README.md b/example/prompt/assistant_gemma/README.md new file mode 100644 index 0000000..6341a71 --- /dev/null +++ b/example/prompt/assistant_gemma/README.md @@ -0,0 +1,4 @@ +# Gemma Assistant + +This example should be run with Gemma-style models that are tuned to behave like an instruction-following assistant chatbot. +Most importantly, the model must have special `` and `` tokens. diff --git a/example/prompt/assistant_gemma/priming.txt b/example/prompt/assistant_gemma/priming.txt new file mode 100644 index 0000000..c36db44 --- /dev/null +++ b/example/prompt/assistant_gemma/priming.txt @@ -0,0 +1,2 @@ +user +Hello! diff --git a/example/prompt/assistant_gemma/setting.sxpb b/example/prompt/assistant_gemma/setting.sxpb new file mode 100644 index 0000000..2f3a684 --- /dev/null +++ b/example/prompt/assistant_gemma/setting.sxpb @@ -0,0 +1,31 @@ +(chat_prefixes (()) + (m + (prefix "user\n") + (suffix "\n")) + (m + (prefix "assistant\n") + (suffix "\n") + ) +) +(substitution + (special_tokens (()) + (() (name "")) + (() (name "")) + ) +) + +(x_priming "priming.txt") +(x_rolling "rolling.txt") +(o_rolling "../../../bld/example/prompt/assistant_gemma.txt") + +; No starting space. +(startspace_on +false) +; No token penalization. +(repeat_window 0) + +; 10 reasonably-long sentences at a time. +(sentence_limit 10) +(sentence_token_limit 100) + +; Limit context to avoid blowing up RAM on large context models. +(model_token_limit 8000) diff --git a/example/prompt/assistant_mistral/README.md b/example/prompt/assistant_mistral/README.md new file mode 100644 index 0000000..5fa4a4f --- /dev/null +++ b/example/prompt/assistant_mistral/README.md @@ -0,0 +1,4 @@ +# Mistral Assistant + +This example should be run with Mistral-style models that are tuned to behave like an instruction-following assistant chatbot. +Most importantly, the model must have special `[INST]` and `[/INST]` tokens. diff --git a/example/prompt/assistant_mistral/priming.txt b/example/prompt/assistant_mistral/priming.txt new file mode 100644 index 0000000..fbd0fe1 --- /dev/null +++ b/example/prompt/assistant_mistral/priming.txt @@ -0,0 +1 @@ +[INST]Hello![/INST] diff --git a/example/prompt/assistant_mistral/setting.sxpb b/example/prompt/assistant_mistral/setting.sxpb new file mode 100644 index 0000000..4826ca4 --- /dev/null +++ b/example/prompt/assistant_mistral/setting.sxpb @@ -0,0 +1,38 @@ +(chat_prefixes (()) + (m + (prefix "[INST]") + (suffix "[/INST]\n") + ) + (m + (prefix "") + (suffix "\n") + ) +) +(substitution + (eos_token_alias "") + (special_tokens (()) + (() (name "[INST]")) + (() (name "[/INST]")) + ) +) + +(x_priming "priming.txt") +(x_rolling "rolling.txt") +(o_rolling "../../../bld/example/prompt/assistant_mistral.txt") + +; No starting space. +(startspace_on +false) + +; 10 reasonably-long sentences at a time. +(sentence_limit 10) +(sentence_token_limit 100) + +; Limit context to avoid blowing up RAM on large context models. +(model_token_limit 8000) + +; Match recommendation for Mistral NeMo v1. +; https://build.nvidia.com/nv-mistralai/mistral-nemo-12b-instruct +(mirostat 0) +(repeat_window 0) +(temperature 0.2) +(top_p 0.7) diff --git a/example/prompt/assistant_plain/setting.sxpb b/example/prompt/assistant_plain/setting.sxpb index d751a84..75dbf8d 100644 --- a/example/prompt/assistant_plain/setting.sxpb +++ b/example/prompt/assistant_plain/setting.sxpb @@ -1,8 +1,8 @@ (protagonist "User") (confidant "Assistant") (substitution - (protagonist_alias "{{user}}") - (confidant_alias "{{char}}") + (protagonist_alias "{{user}}") + (confidant_alias "{{char}}") ) (x_priming "../assistant_vicuna/priming.txt") (x_rolling "../assistant_vicuna/rolling.txt") diff --git a/example/prompt/assistant_vicuna/setting.sxpb b/example/prompt/assistant_vicuna/setting.sxpb index 6dc3c85..ec6fb5e 100644 --- a/example/prompt/assistant_vicuna/setting.sxpb +++ b/example/prompt/assistant_vicuna/setting.sxpb @@ -1,20 +1,20 @@ (protagonist "USER") (confidant "ASSISTANT") -(((chat_prefixes)) +((chat_prefixes) (m - (prefix "{{user}}:") - (suffix "") + (prefix "{{user}}: ") + (suffix "") ) (m - (prefix " {{char}}:") - ; Model must be fine-tuned to end the response with EOS token. - (suffix "") + (prefix " {{char}}:") + ; Model must be fine-tuned to end the response with EOS token. + (suffix "") ) ) (substitution - (eos_token_alias "") - (protagonist_alias "{{user}}") - (confidant_alias "{{char}}") + (eos_token_alias "") + (protagonist_alias "{{user}}") + (confidant_alias "{{char}}") ) (x_priming "priming.txt") (x_rolling "rolling.txt") diff --git a/example/prompt/roshambo_kira/setting.sxpb b/example/prompt/roshambo_kira/setting.sxpb index 0a478e6..ed1be2d 100644 --- a/example/prompt/roshambo_kira/setting.sxpb +++ b/example/prompt/roshambo_kira/setting.sxpb @@ -1,8 +1,8 @@ (protagonist "L") (confidant "Kira") (substitution - (protagonist_alias "{{user}}") - (confidant_alias "{{char}}") + (protagonist_alias "{{user}}") + (confidant_alias "{{char}}") ) (x_priming "priming.txt") (x_rolling "rolling.txt") diff --git a/src/chat/chat_main.cc b/src/chat/chat_main.cc index 5d15da3..b9338af 100644 --- a/src/chat/chat_main.cc +++ b/src/chat/chat_main.cc @@ -92,16 +92,16 @@ int main(int argc, char** argv) } if (exstatus == 0 && !opt.lora_filename.empty()) { - const char* base_model_filename = NULL; - if (!opt.lora_base_model_filename.empty()) { - base_model_filename = opt.lora_base_model_filename.c_str(); - } const float scale = 1.0f; - int istat = llama_model_apply_lora_from_file( - model, opt.lora_filename.c_str(), scale, - base_model_filename, - opt.thread_count); - if (istat != 0) {exstatus = 1;} + struct llama_lora_adapter* lora = llama_lora_adapter_init( + model, opt.lora_filename.c_str()); + if (lora) { + int istat = llama_lora_adapter_set(ctx, lora, scale); + if (istat != 0) { + exstatus = 1; + llama_lora_adapter_free(lora); + } + } } Vocabulary vocabulary(model); diff --git a/src/chat/opt.cc b/src/chat/opt.cc index 441be41..160282c 100644 --- a/src/chat/opt.cc +++ b/src/chat/opt.cc @@ -148,7 +148,7 @@ static void reinitialize_chat_prefixes(ChatOptions& opt) { opt.message_opts[0].prefix += ' '; opt.message_opts[1].prefix += ' '; } - opt.message_opts[0].prefix += opt.protagonist + ':'; + opt.message_opts[0].prefix += opt.protagonist + ": "; opt.message_opts[1].prefix += opt.confidant + ':'; } for (auto& message_opt : opt.message_opts) { @@ -238,7 +238,7 @@ rendezllama::parse_options(rendezllama::ChatOptions& opt, int argc, char** argv) opt.seed = INT_MAX & time(NULL); opt.antiprompts = opt.sentence_terminals; - opt.antiprompts.push_back("\n"); + opt.antiprompts.insert("\n"); for (argi = 1; exstatus == 0 && argi < argc; ++argi) { if (false) { @@ -261,13 +261,6 @@ rendezllama::parse_options(rendezllama::ChatOptions& opt, int argc, char** argv) else if (0 == strcmp("--lora", argv[argi])) { argi += 1; opt.lora_filename = argv[argi]; - opt.mmap_on = false; // mmap() is incompatible. - } - else if (0 == strcmp("--lora_base_model", argv[argi]) || - 0 == strcmp("--lora_base", argv[argi]) || - 0 == strcmp("--lora-base", argv[argi])) { - argi += 1; - opt.lora_base_model_filename = argv[argi]; } else if (0 == strcmp("--x_setting", argv[argi])) { argi += 1; @@ -489,9 +482,6 @@ slurp_sxpb_options_close_FildeshX( opt.mmap_on = false; // mmap() is incompatible. } - lone_subfield_at_FildeshSxpb_to_cc_string( - &opt.lora_base_model_filename, sxpb, top_it, "lora_base_model"); - if (lone_subfield_at_FildeshSxpb_to_str(&s, sxpb, top_it, "x_rolling")) { FildeshX* rolling_in = open_sibling_FildeshXF(sxpb_filename.c_str(), s); parse_rolling_prompt(rolling_in, opt); @@ -590,13 +580,13 @@ slurp_sxpb_options_close_FildeshX( for (it = first_at_FildeshSxpb(sxpb, it); !nullish_FildeshSxpbIT(it); it = next_at_FildeshSxpb(sxpb, it)) { s = str_value_at_FildeshSxpb(sxpb, it); - opt.sentence_terminals.push_back(s); + opt.sentence_terminals.insert(s); if (s[0] == '\n' && s[1] == '\0') {found = true;} } opt.antiprompts = opt.sentence_terminals; if (!found) { - opt.antiprompts.push_back("\n"); + opt.antiprompts.insert("\n"); } } diff --git a/src/chat/opt.hh b/src/chat/opt.hh index 01e275c..ba56528 100644 --- a/src/chat/opt.hh +++ b/src/chat/opt.hh @@ -3,6 +3,7 @@ #include #include +#include #include struct FildeshX; @@ -29,7 +30,6 @@ struct ChatOptions { std::vector message_opts; std::string model_filename; std::string lora_filename; - std::string lora_base_model_filename; std::string transcript_sibling_filename; std::string transcript_filename; @@ -69,8 +69,8 @@ struct ChatOptions { bool mlock_on = false; bool mmap_on = true; bool coprocess_mode_on = false; - std::vector sentence_terminals = {"!", ".", "?", "…"}; - std::vector antiprompts; + std::set sentence_terminals = {"!", ".", "?", "…"}; + std::set antiprompts; // Can't set these yet. bool verbose_prompt = false; }; diff --git a/src/chat/opt_schema.cc b/src/chat/opt_schema.cc index 2e3eda7..20ca537 100644 --- a/src/chat/opt_schema.cc +++ b/src/chat/opt_schema.cc @@ -34,8 +34,6 @@ rendezllama::options_sxproto_schema() {"coprocess_mode_on", FILL_DEFAULT_FildeshSxprotoField_BOOL}, {"frequency_penalty", FILL_DEFAULT_FildeshSxprotoField_FLOAT}, {"linespace_on", FILL_DEFAULT_FildeshSxprotoField_BOOL}, - {"lora_base_model", FILL_FildeshSxprotoField_STRING(1, FILENAME_MAX)}, - {"lora_base", FILL_DEFAULT_FildeshSxprotoField_ALIAS}, {"lora", FILL_FildeshSxprotoField_STRING(1, FILENAME_MAX)}, {"min_p", FILL_DEFAULT_FildeshSxprotoField_FLOAT}, {"mirostat", FILL_FildeshSxprotoField_INT(0, 2)}, diff --git a/src/chat/trajectory.cc b/src/chat/trajectory.cc index dc70251..c3dd183 100644 --- a/src/chat/trajectory.cc +++ b/src/chat/trajectory.cc @@ -45,13 +45,32 @@ ChatTrajectory::insert_all_at( } } +static + void +maybe_pop_for_rewrite( + ChatTrajectory& trajectory, + fildesh::ostringstream& oss, + const Vocabulary& vocabulary) +{ + if (trajectory.priming_token_count() < trajectory.token_count()) { + if (vocabulary.last_char_of(trajectory.token()) == ' ') { + vocabulary.detokenize_to(oss, trajectory.token()); + trajectory.erase_all_at(trajectory.token_count() - 1); + } + } +} + void ChatTrajectory::tokenize_append( std::string_view s, const Vocabulary& vocabulary) { + fildesh::ostringstream oss; + maybe_pop_for_rewrite(*this, oss, vocabulary); + oss << s; + std::vector tmp; - vocabulary.tokenize_to(tmp, s); + vocabulary.tokenize_to(tmp, oss.view()); this->insert_all_at(this->token_count(), tmp); } diff --git a/src/language/inference.cc b/src/language/inference.cc index c3fc789..9f5605b 100644 --- a/src/language/inference.cc +++ b/src/language/inference.cc @@ -23,7 +23,7 @@ using rendezllama::Vocabulary; const std::string& rendezllama::antiprompt_suffix( std::string_view text, - const std::vector& antiprompts) + const std::set& antiprompts) { static const std::string empty_string; for (const std::string& s : antiprompts) { @@ -62,7 +62,7 @@ rendezllama::augment_tokenize_chat_input( chat_guide.begin_turn(opt.message_opts.size()-1); s.erase(0, 2); prevent_subsequent_newline = maybe_trim_endspace(s); - if (opt.message_opts.back().prefix.back() != '\n' || opt.linespace_on) { + if (opt.message_opts.back().prefix.back() == '\n' && opt.linespace_on) { if (!s.empty() && s.front() != ' ') { s.insert(0, " "); } @@ -79,7 +79,7 @@ rendezllama::augment_tokenize_chat_input( } else { chat_guide.yield_turn(0); - if (opt.message_opts[0].prefix.back() != '\n' || opt.linespace_on) { + if (opt.message_opts[0].prefix.back() == '\n' && opt.linespace_on) { if (!s.empty() && s.front() != ' ') { s.insert(0, " "); } @@ -96,7 +96,8 @@ rendezllama::augment_tokenize_chat_input( rendezllama::make_llama_context(rendezllama::ChatOptions& opt) { llama_model_params model_params = llama_model_default_params(); - model_params.vocab_only = true; + model_params.use_mlock = opt.mlock_on; + model_params.use_mmap = opt.mmap_on; struct llama_model* model = llama_load_model_from_file( opt.model_filename.c_str(), model_params); @@ -119,9 +120,10 @@ rendezllama::make_llama_context(rendezllama::ChatOptions& opt) llama_context_params ctx_params = llama_context_default_params(); ctx_params.n_ctx = opt.context_token_limit; ctx_params.seed = opt.seed; - ctx_params.rope_freq_scale = llama_rope_freq_scale_train(model); ctx_params.n_threads = opt.thread_count; ctx_params.n_batch = opt.batch_count; + ctx_params.rope_freq_scale = llama_rope_freq_scale_train(model); + assert(ctx_params.rope_freq_scale > 0.0); while ( (unsigned)(opt.model_token_limit / ctx_params.rope_freq_scale) < @@ -130,14 +132,6 @@ rendezllama::make_llama_context(rendezllama::ChatOptions& opt) ctx_params.rope_freq_scale /= 2; } - llama_free_model(model); - model = llama_load_model_from_file( - opt.model_filename.c_str(), model_params); - if (!model) { - fildesh_log_error("Failed to open model."); - return std::make_tuple(nullptr, nullptr); - } - struct llama_context* ctx = llama_new_context_with_model(model, ctx_params); if (!ctx) { llama_free_model(model); diff --git a/src/language/inference.hh b/src/language/inference.hh index 7c2afaa..ea4819e 100644 --- a/src/language/inference.hh +++ b/src/language/inference.hh @@ -3,6 +3,7 @@ #include #include #include +#include #include #include "llama.h" @@ -18,7 +19,7 @@ class Vocabulary; const std::string& antiprompt_suffix( std::string_view text, - const std::vector& antiprompts); + const std::set& antiprompts); void augment_tokenize_chat_input( ChatGuide& chat_guide, diff --git a/src/language/vocabulary.cc b/src/language/vocabulary.cc index f114e71..0510ec6 100644 --- a/src/language/vocabulary.cc +++ b/src/language/vocabulary.cc @@ -71,14 +71,22 @@ Vocabulary::detokenize_to(FildeshO* out, Token_id token_id) const const size_t attempt_size = allocated_size_of_FildeshO(out) - out->size; char* s = grow_FildeshO(out, attempt_size); - int n = llama_token_to_piece(model_, token_id, s, attempt_size); + int n = llama_token_to_piece( + model_, token_id, + s, attempt_size, + /*lstrip=*/0, + /*special=*/false); if (n >= 0) { out->size -= (attempt_size - n); } else { n = -n; out->size -= attempt_size; s = grow_FildeshO(out, n); - n = llama_token_to_piece(model_, token_id, s, n); + n = llama_token_to_piece( + model_, token_id, + s, n, + /*lstrip=*/0, + /*special=*/false); } } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d054def..2e2a3fd 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,4 +1,4 @@ -set(LlamaCpp_VOCAB_MODEL "${LlamaCpp_SOURCE_DIR}/models/ggml-vocab-llama.gguf") +set(LlamaCpp_VOCAB_MODEL "${LlamaCpp_SOURCE_DIR}/models/ggml-vocab-llama-spm.gguf") add_subdirectory(chat) add_subdirectory(example) diff --git a/test/chat/guide_test.cc b/test/chat/guide_test.cc index 2ba837f..1c7686a 100644 --- a/test/chat/guide_test.cc +++ b/test/chat/guide_test.cc @@ -47,14 +47,14 @@ the_test(llama_model* model) assert(guide.maybe_erase_trailing_message_prefix()); *in = FildeshX_of_strlit("\ - (((chat_prefixes))\n\ + ((chat_prefixes)\n\ (m (prefix \"A:\") (suffix \"\\n###\\n\"))\n\ (m (prefix \"B:\"))\n\ (m (prefix \"C:\"))\n\ (m (prefix \"D:\") (suffix \"\\n\"))\n\ )\n\ (substitution\n\ - (eos_token_alias \"\")\n\ + (eos_token_alias \"\")\n\ )\n\ "); good = rendezllama::slurp_sxpb_initialize_options_close_FildeshX(in, opt, ""); diff --git a/test/chat/opt_test.cc b/test/chat/opt_test.cc index da6a03e..db81aad 100644 --- a/test/chat/opt_test.cc +++ b/test/chat/opt_test.cc @@ -17,7 +17,7 @@ chat_prefixes_parse_test() bool all_good; *in = FildeshX_of_strlit( - "(((chat_prefixes)) \ + "((chat_prefixes) \ \"{{user}}:\" \ \"{{char}} feels:\" \ \"{{char}} wants:\" \ @@ -55,7 +55,7 @@ sentence_terminals_parse_test() rendezllama::ChatOptions opt; FildeshX in[1]; *in = FildeshX_of_strlit( - "((sentence_terminals) \ + "(sentence_terminals () \ \"\\n\" \ \"\\\"\" \ \".\" \ @@ -63,9 +63,11 @@ sentence_terminals_parse_test() bool all_good = rendezllama::slurp_sxpb_dynamic_options_close_FildeshX(in, opt); assert(all_good); assert(opt.sentence_terminals.size() == 3); - assert(opt.sentence_terminals[0] == "\n"); - assert(opt.sentence_terminals[1] == "\""); - assert(opt.sentence_terminals[2] == "."); + // Insert 3 and expect that they add nothing new. + opt.sentence_terminals.insert("\n"); + opt.sentence_terminals.insert("\""); + opt.sentence_terminals.insert("."); + assert(opt.sentence_terminals.size() == 3); } int main() diff --git a/test/example/prompt/CMakeLists.txt b/test/example/prompt/CMakeLists.txt index 694cd2d..d065d78 100644 --- a/test/example/prompt/CMakeLists.txt +++ b/test/example/prompt/CMakeLists.txt @@ -15,6 +15,10 @@ add_test(NAME example_prompt_assistant_chatml_parse_test COMMAND example_prompt_ "${PROJECT_SOURCE_DIR}/example/prompt/assistant_chatml/setting.sxpb") add_test(NAME example_prompt_assistant_coprocess_parse_test COMMAND example_prompt_parse_test "${PROJECT_SOURCE_DIR}/example/prompt/assistant_coprocess/setting.sxpb") +add_test(NAME example_prompt_assistant_gemma_parse_test COMMAND example_prompt_parse_test + "${PROJECT_SOURCE_DIR}/example/prompt/assistant_gemma/setting.sxpb") +add_test(NAME example_prompt_assistant_mistral_parse_test COMMAND example_prompt_parse_test + "${PROJECT_SOURCE_DIR}/example/prompt/assistant_mistral/setting.sxpb") add_test(NAME example_prompt_assistant_plain_parse_test COMMAND example_prompt_parse_test "${PROJECT_SOURCE_DIR}/example/prompt/assistant_plain/setting.sxpb") add_test(NAME example_prompt_assistant_vicuna_parse_test COMMAND example_prompt_parse_test