Skip to content

Commit

Permalink
src: add initial kv-cache exploration code
Browse files Browse the repository at this point in the history
Signed-off-by: Daniel Bevenius <[email protected]>
  • Loading branch information
danbev committed Jun 23, 2024
1 parent ed8a14e commit cfd208b
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 0 deletions.
3 changes: 3 additions & 0 deletions fundamentals/llama.cpp/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ OBJS = llama.cpp/common.o \
llama.cpp/grammar-parser.o \
llama.cpp/json-schema-to-grammar.o

kv-cache: src/kv-cache.cpp
$(CXX) $(CXXFLAGS) $^ -o $@ $(OBJS)

simple-prompt: src/simple-prompt.cpp
$(CXX) $(CXXFLAGS) $^ -o $@ $(OBJS)

Expand Down
47 changes: 47 additions & 0 deletions fundamentals/llama.cpp/src/kv-cache.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#include "llama.h"

#include <cstdio>
#include <string>
#include <cstdlib>
#include <vector>

int main(int argc, char** argv) {
fprintf(stdout, "llama.cpp KV-Cache exploration\n");
llama_model_params model_params = llama_model_default_params();

model_params.main_gpu = 0;
model_params.n_gpu_layers = 0;
std::string model_path = "models/llama-2-13b-chat.Q4_0.gguf";
fprintf(stdout, "llama.cpp example using model: %s\n", model_path.c_str());

llama_backend_init();

llama_model* model = llama_load_model_from_file(model_path.c_str(), model_params);
if (model == NULL) {
fprintf(stderr , "%s: error: failed to to load model %s\n" , __func__, model_path.c_str());
return 1;
}

llama_context_params ctx_params = llama_context_default_params();
ctx_params.seed = 1234;
ctx_params.n_ctx = 1024;
ctx_params.n_threads = 4;
ctx_params.n_threads_batch = 4;
ctx_params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_LINEAR;

llama_context * ctx = llama_new_context_with_model(model, ctx_params);
if (ctx == NULL) {
fprintf(stderr , "%s: error: failed to create the llama_context\n" , __func__);
return 1;
}

struct llama_kv_cache_view kv_view = llama_kv_cache_view_init(ctx, 1);
printf("kv_view n_cells: %d\n", kv_view.n_cells);
printf("kv_view n_max_seq: %d\n", kv_view.n_seq_max);

llama_free(ctx);
llama_free_model(model);
llama_backend_free();

return 0;
}

0 comments on commit cfd208b

Please sign in to comment.