llama : mmap

ggml-ci
ggerganov · Dec 22, 2024 · 1b790de · 1b790de
1 parent 8233c18
commit 1b790de
Show file tree

Hide file tree

Showing 10 changed files with 648 additions and 592 deletions.
diff --git a/src/llama-adapter.h b/src/llama-adapter.h
@@ -7,6 +7,7 @@
 
 #include <vector>
 #include <map>
+#include <algorithm>
 
 //
 // llama_adapter_vec

diff --git a/src/llama-arch.h b/src/llama-arch.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <map>
+#include <string>
 
 //
 // gguf constants (sync with gguf.py)

diff --git a/src/llama-batch.h b/src/llama-batch.h
@@ -3,6 +3,8 @@
 #include "llama.h"
 
 #include <vector>
+#include <cstring>
+#include <algorithm>
 
 // very similar to llama_batch,
 // but has more metadata about sequences

diff --git a/src/llama-context.cpp b/src/llama-context.cpp
@@ -799,7 +799,7 @@ static bool llama_state_load_file_internal(struct llama_context * ctx, const cha
 
     // restore the context state
     {
-        const size_t n_state_size_cur = file.size - file.tell();
+        const size_t n_state_size_cur = file.size() - file.tell();
 
         llama_data_read_file data_ctx(&file);
         const size_t n_read = llama_state_set_data_internal(ctx, data_ctx);
@@ -936,7 +936,7 @@ static size_t llama_state_seq_load_file_internal(struct llama_context * ctx, con
 
     // restore the context state
     {
-        const size_t state_size = file.size - file.tell();
+        const size_t state_size = file.size() - file.tell();
         llama_data_read_file data_ctx(&file);
         const size_t nread = llama_state_seq_set_data_internal(ctx, data_ctx, dest_seq_id);
         if (!nread) {

diff --git a/src/llama-impl.h b/src/llama-impl.h
@@ -24,6 +24,7 @@ LLAMA_ATTRIBUTE_FORMAT(2, 3)
 void llama_log_internal        (ggml_log_level level, const char * format, ...);
 void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data);
 
+// TODO: rename to llama_format ?
 LLAMA_ATTRIBUTE_FORMAT(1, 2)
 std::string format(const char * fmt, ...);
 

diff --git a/src/llama-kv-cache.h b/src/llama-kv-cache.h
@@ -8,6 +8,7 @@
 
 #include <set>
 #include <vector>
+#include <limits>
 
 struct llama_kv_cell {
     llama_pos pos   = -1;