Skip to content

Commit

Permalink
Merge branch 'trunk' into twitch_sentiment
Browse files Browse the repository at this point in the history
  • Loading branch information
grencez committed Jul 24, 2024
2 parents 26b9747 + 54030b1 commit 8f65e12
Show file tree
Hide file tree
Showing 29 changed files with 195 additions and 99 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_STANDARD 17)


option(BUILD_SHARED_LIBS "Build using shared libraries" OFF)
option(LLAMA_OPENBLAS_ON "llama: use OpenBLAS" OFF)


Expand Down
2 changes: 1 addition & 1 deletion dep/cmake_fetchcontent/fildesh.cmake
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FetchContent_Declare(
Fildesh
GIT_REPOSITORY "https://github.com/fildesh/fildesh.git"
GIT_TAG "9defdbec27700e01888d0834015e28947baba7cd"
GIT_TAG "e9421ebbb546cf38bf2051d02fdc38dcf71f8216"
)
FetchContent_MakeAvailable(Fildesh)
set(Fildesh_INCLUDE_DIRS ${Fildesh_INCLUDE_DIRS} PARENT_SCOPE)
Expand Down
7 changes: 5 additions & 2 deletions dep/cmake_fetchcontent/llama_cpp.cmake
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
FetchContent_Declare(
LlamaCpp
GIT_REPOSITORY "https://github.com/ggerganov/llama.cpp.git"
GIT_TAG "621e86b331f8b0e71f79fd82a4ae1cd54c3e4396"
GIT_TAG "50e05353e88d50b644688caa91f5955e8bdb9eb9"
)

set(GGML_OPENMP FALSE CACHE BOOL "OpenMP off for compatibility.")
FetchContent_MakeAvailable(LlamaCpp)

set(LlamaCpp_SOURCE_DIR "${llamacpp_SOURCE_DIR}" PARENT_SCOPE)
set(LlamaCpp_INCLUDE_DIRS "${llamacpp_SOURCE_DIR}" PARENT_SCOPE)
set(LlamaCpp_INCLUDE_DIRS "${llamacpp_SOURCE_DIR}/include" PARENT_SCOPE)
set(LlamaCpp_LIBRARIES "$<TARGET_NAME:llama>" PARENT_SCOPE)

if (LLAMA_OPENBLAS_ON)
Expand Down
3 changes: 0 additions & 3 deletions doc/setting/model.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,11 @@ I prefer using flags to specify model files.
- `--model ggml-model-q4_0.gguf` are the model weights. Usually quantized.
- Required.
- `--lora ggml-adapter-model.gguf` gives a LoRA.
- `--lora_base ggml-model-f16.gguf` gives higher-precision model weights to apply the LoRA on top of.
- Not required when using `--lora` but you'll otherwise get a warning if the `--model` weights are low-precision.

Even though the flags are preferred, `setting.sxpb` supports them too:
```lisp
(model "ggml-model-q4_0.gguf")
(lora "ggml-adapter-model.gguf")
(lora_base "ggml-model-f16.gguf")
```

## Context
Expand Down
11 changes: 5 additions & 6 deletions example/prompt/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,11 @@ In order of interest:
- Demonstrates why LLMs are hard to get right.
- [confidant_alpaca](confidant_alpaca/): A camelid that occasionally spits.
- Demonstrates a method of prompting instruction-tuned models to fill in character dialogue.
- [assistant_alpaca](assistant_alpaca/): Instruction-following AI assistant.
- Minimial prompt that lets an Alpaca-style model do its thing.
- Only works with models that end the assistant's message with an EOS token.
- [assistant_chatml](assistant_chatml/): Instruction-following AI assistant.
- Minimial prompt that lets an ChatML-style model do its thing.
- Only works with models that have special `<|im_start|>` and `<|im_end|>` tokens.
- Instruction-following AI assistants.
- [assistant_alpaca](assistant_alpaca/): Alpaca prompt format.
- [assistant_chatml](assistant_chatml/): ChatML prompt format that requires special `<|im_start|>` and `<|im_end|>` tokens.
- [assistant_gemma](assistant_gemma/): Gemma prompt format requires special `<start_of_turn>` and `<end_of_turn>` tokens.
- [assistant_mistral](assistant_mistral/): Mistral propmt format that requires special `[INST]` and `[/INST]` tokens.
- [assistant_vicuna](assistant_vicuna/): Conversational AI assistant.
- Minimial prompt that lets a Vicuna-style model do its thing.
- Only works with models that end the assistant's message with an EOS token.
Expand Down
16 changes: 8 additions & 8 deletions example/prompt/assistant_alpaca/setting.sxpb
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
(((chat_prefixes))
((chat_prefixes)
(m
(prefix "### Instruction:\n")
(suffix "\n\n"))
(prefix "### Instruction:\n")
(suffix "\n\n"))
(m
(prefix "### Response:\n")
; Model must be fine-tuned to end the response with EOS token.
(suffix "</s>\n\n")
(prefix "### Response:\n")
; Model must be fine-tuned to end the response with EOS token.
(suffix "</s>\n\n")
)
)
(substitution
(eos_token_alias "</s>")
(eos_token_alias "</s>")
)
; Lines are considered as sentences.
((sentence_terminals) "\n")
(sentence_terminals () "\n")
; Max 10 lines at a time.
(sentence_limit 10)
(sentence_token_limit 1000)
Expand Down
24 changes: 12 additions & 12 deletions example/prompt/assistant_chatml/setting.sxpb
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
(((chat_prefixes))
((chat_prefixes)
(m
(prefix "<|im_start|>user\n")
(suffix "<|im_end|>\n"))
(prefix "<|im_start|>user\n")
(suffix "<|im_end|>\n"))
(m
(prefix "<|im_start|>assistant\n")
(suffix "<|im_end|>\n")
(prefix "<|im_start|>assistant\n")
(suffix "<|im_end|>\n")
)
)
(substitution
; Uncomment the next 2 lines if your model doesn't support ChatML tokens.
;(bos_token_alias "<|im_start|>")
;(eos_token_alias "<|im_end|>")
((special_tokens)
(() (name "<|im_start|>"))
(() (name "<|im_end|>"))
)
; Uncomment the next 2 lines if your model doesn't support ChatML tokens.
;(bos_token_alias "<|im_start|>")
;(eos_token_alias "<|im_end|>")
(special_tokens (())
(() (name "<|im_start|>"))
(() (name "<|im_end|>"))
)
)

(x_priming "priming.txt")
Expand Down
2 changes: 1 addition & 1 deletion example/prompt/assistant_coprocess/setting.sxpb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; Bot name is replaced in the prompt.
(confidant "Banterbot")
(substitution
(confidant_alias "{{char}}")
(confidant_alias "{{char}}")
)

(thread_count 2)
Expand Down
4 changes: 4 additions & 0 deletions example/prompt/assistant_gemma/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Gemma Assistant

This example should be run with Gemma-style models that are tuned to behave like an instruction-following assistant chatbot.
Most importantly, the model must have special `<start_of_turn>` and `<end_of_turn>` tokens.
2 changes: 2 additions & 0 deletions example/prompt/assistant_gemma/priming.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<start_of_turn>user
Hello!<end_of_turn>
31 changes: 31 additions & 0 deletions example/prompt/assistant_gemma/setting.sxpb
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
(chat_prefixes (())
(m
(prefix "<start_of_turn>user\n")
(suffix "<end_of_turn>\n"))
(m
(prefix "<start_of_turn>assistant\n")
(suffix "<end_of_turn>\n")
)
)
(substitution
(special_tokens (())
(() (name "<start_of_turn>"))
(() (name "<end_of_turn>"))
)
)

(x_priming "priming.txt")
(x_rolling "rolling.txt")
(o_rolling "../../../bld/example/prompt/assistant_gemma.txt")

; No starting space.
(startspace_on +false)
; No token penalization.
(repeat_window 0)

; 10 reasonably-long sentences at a time.
(sentence_limit 10)
(sentence_token_limit 100)

; Limit context to avoid blowing up RAM on large context models.
(model_token_limit 8000)
4 changes: 4 additions & 0 deletions example/prompt/assistant_mistral/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Mistral Assistant

This example should be run with Mistral-style models that are tuned to behave like an instruction-following assistant chatbot.
Most importantly, the model must have special `[INST]` and `[/INST]` tokens.
1 change: 1 addition & 0 deletions example/prompt/assistant_mistral/priming.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[INST]Hello![/INST]
38 changes: 38 additions & 0 deletions example/prompt/assistant_mistral/setting.sxpb
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
(chat_prefixes (())
(m
(prefix "[INST]")
(suffix "[/INST]\n")
)
(m
(prefix "")
(suffix "</s>\n")
)
)
(substitution
(eos_token_alias "</s>")
(special_tokens (())
(() (name "[INST]"))
(() (name "[/INST]"))
)
)

(x_priming "priming.txt")
(x_rolling "rolling.txt")
(o_rolling "../../../bld/example/prompt/assistant_mistral.txt")

; No starting space.
(startspace_on +false)

; 10 reasonably-long sentences at a time.
(sentence_limit 10)
(sentence_token_limit 100)

; Limit context to avoid blowing up RAM on large context models.
(model_token_limit 8000)

; Match recommendation for Mistral NeMo v1.
; https://build.nvidia.com/nv-mistralai/mistral-nemo-12b-instruct
(mirostat 0)
(repeat_window 0)
(temperature 0.2)
(top_p 0.7)
4 changes: 2 additions & 2 deletions example/prompt/assistant_plain/setting.sxpb
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
(protagonist "User")
(confidant "Assistant")
(substitution
(protagonist_alias "{{user}}")
(confidant_alias "{{char}}")
(protagonist_alias "{{user}}")
(confidant_alias "{{char}}")
)
(x_priming "../assistant_vicuna/priming.txt")
(x_rolling "../assistant_vicuna/rolling.txt")
Expand Down
18 changes: 9 additions & 9 deletions example/prompt/assistant_vicuna/setting.sxpb
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
(protagonist "USER")
(confidant "ASSISTANT")
(((chat_prefixes))
((chat_prefixes)
(m
(prefix "{{user}}:")
(suffix "</s>")
(prefix "{{user}}: ")
(suffix "</s>")
)
(m
(prefix " {{char}}:")
; Model must be fine-tuned to end the response with EOS token.
(suffix "</s>")
(prefix " {{char}}:")
; Model must be fine-tuned to end the response with EOS token.
(suffix "</s>")
)
)
(substitution
(eos_token_alias "</s>")
(protagonist_alias "{{user}}")
(confidant_alias "{{char}}")
(eos_token_alias "</s>")
(protagonist_alias "{{user}}")
(confidant_alias "{{char}}")
)
(x_priming "priming.txt")
(x_rolling "rolling.txt")
Expand Down
4 changes: 2 additions & 2 deletions example/prompt/roshambo_kira/setting.sxpb
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
(protagonist "L")
(confidant "Kira")
(substitution
(protagonist_alias "{{user}}")
(confidant_alias "{{char}}")
(protagonist_alias "{{user}}")
(confidant_alias "{{char}}")
)
(x_priming "priming.txt")
(x_rolling "rolling.txt")
Expand Down
18 changes: 9 additions & 9 deletions src/chat/chat_main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,16 +92,16 @@ int main(int argc, char** argv)
}

if (exstatus == 0 && !opt.lora_filename.empty()) {
const char* base_model_filename = NULL;
if (!opt.lora_base_model_filename.empty()) {
base_model_filename = opt.lora_base_model_filename.c_str();
}
const float scale = 1.0f;
int istat = llama_model_apply_lora_from_file(
model, opt.lora_filename.c_str(), scale,
base_model_filename,
opt.thread_count);
if (istat != 0) {exstatus = 1;}
struct llama_lora_adapter* lora = llama_lora_adapter_init(
model, opt.lora_filename.c_str());
if (lora) {
int istat = llama_lora_adapter_set(ctx, lora, scale);
if (istat != 0) {
exstatus = 1;
llama_lora_adapter_free(lora);
}
}
}

Vocabulary vocabulary(model);
Expand Down
18 changes: 4 additions & 14 deletions src/chat/opt.cc
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ static void reinitialize_chat_prefixes(ChatOptions& opt) {
opt.message_opts[0].prefix += ' ';
opt.message_opts[1].prefix += ' ';
}
opt.message_opts[0].prefix += opt.protagonist + ':';
opt.message_opts[0].prefix += opt.protagonist + ": ";
opt.message_opts[1].prefix += opt.confidant + ':';
}
for (auto& message_opt : opt.message_opts) {
Expand Down Expand Up @@ -238,7 +238,7 @@ rendezllama::parse_options(rendezllama::ChatOptions& opt, int argc, char** argv)
opt.seed = INT_MAX & time(NULL);

opt.antiprompts = opt.sentence_terminals;
opt.antiprompts.push_back("\n");
opt.antiprompts.insert("\n");

for (argi = 1; exstatus == 0 && argi < argc; ++argi) {
if (false) {
Expand All @@ -261,13 +261,6 @@ rendezllama::parse_options(rendezllama::ChatOptions& opt, int argc, char** argv)
else if (0 == strcmp("--lora", argv[argi])) {
argi += 1;
opt.lora_filename = argv[argi];
opt.mmap_on = false; // mmap() is incompatible.
}
else if (0 == strcmp("--lora_base_model", argv[argi]) ||
0 == strcmp("--lora_base", argv[argi]) ||
0 == strcmp("--lora-base", argv[argi])) {
argi += 1;
opt.lora_base_model_filename = argv[argi];
}
else if (0 == strcmp("--x_setting", argv[argi])) {
argi += 1;
Expand Down Expand Up @@ -489,9 +482,6 @@ slurp_sxpb_options_close_FildeshX(
opt.mmap_on = false; // mmap() is incompatible.
}

lone_subfield_at_FildeshSxpb_to_cc_string(
&opt.lora_base_model_filename, sxpb, top_it, "lora_base_model");

if (lone_subfield_at_FildeshSxpb_to_str(&s, sxpb, top_it, "x_rolling")) {
FildeshX* rolling_in = open_sibling_FildeshXF(sxpb_filename.c_str(), s);
parse_rolling_prompt(rolling_in, opt);
Expand Down Expand Up @@ -590,13 +580,13 @@ slurp_sxpb_options_close_FildeshX(
for (it = first_at_FildeshSxpb(sxpb, it); !nullish_FildeshSxpbIT(it);
it = next_at_FildeshSxpb(sxpb, it)) {
s = str_value_at_FildeshSxpb(sxpb, it);
opt.sentence_terminals.push_back(s);
opt.sentence_terminals.insert(s);
if (s[0] == '\n' && s[1] == '\0') {found = true;}
}

opt.antiprompts = opt.sentence_terminals;
if (!found) {
opt.antiprompts.push_back("\n");
opt.antiprompts.insert("\n");
}
}

Expand Down
6 changes: 3 additions & 3 deletions src/chat/opt.hh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include <string>
#include <ostream>
#include <set>
#include <vector>

struct FildeshX;
Expand All @@ -29,7 +30,6 @@ struct ChatOptions {
std::vector<ChatMessageOpt> message_opts;
std::string model_filename;
std::string lora_filename;
std::string lora_base_model_filename;
std::string transcript_sibling_filename;
std::string transcript_filename;

Expand Down Expand Up @@ -69,8 +69,8 @@ struct ChatOptions {
bool mlock_on = false;
bool mmap_on = true;
bool coprocess_mode_on = false;
std::vector<std::string> sentence_terminals = {"!", ".", "?", ""};
std::vector<std::string> antiprompts;
std::set<std::string> sentence_terminals = {"!", ".", "?", ""};
std::set<std::string> antiprompts;
// Can't set these yet.
bool verbose_prompt = false;
};
Expand Down
2 changes: 0 additions & 2 deletions src/chat/opt_schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ rendezllama::options_sxproto_schema()
{"coprocess_mode_on", FILL_DEFAULT_FildeshSxprotoField_BOOL},
{"frequency_penalty", FILL_DEFAULT_FildeshSxprotoField_FLOAT},
{"linespace_on", FILL_DEFAULT_FildeshSxprotoField_BOOL},
{"lora_base_model", FILL_FildeshSxprotoField_STRING(1, FILENAME_MAX)},
{"lora_base", FILL_DEFAULT_FildeshSxprotoField_ALIAS},
{"lora", FILL_FildeshSxprotoField_STRING(1, FILENAME_MAX)},
{"min_p", FILL_DEFAULT_FildeshSxprotoField_FLOAT},
{"mirostat", FILL_FildeshSxprotoField_INT(0, 2)},
Expand Down
Loading

0 comments on commit 8f65e12

Please sign in to comment.