ggml: update ggml submodule

danbev · Nov 20, 2024 · 0478f68 · 0478f68
1 parent 3e087d6
commit 0478f68
Show file tree

Hide file tree

Showing 10 changed files with 76 additions and 19 deletions.
diff --git a/fundamentals/ggml/Makefile b/fundamentals/ggml/Makefile
@@ -37,11 +37,15 @@ TARGETS_CPP := $(patsubst $(SRCDIR)/%.cpp, %, $(SOURCES_CPP))
 ifeq ($(OS),Linux)
 CFLAGS += -fopenmp
 CXXFLAGS += -fopenmp
-WHOLE_ARCHIVE = -Wl,--whole-archive ggml/build/src/libggml.a -Wl,--no-whole-archive 
+WHOLE_ARCHIVE  = -Wl,--whole-archive ggml/build/src/libggml.a -Wl,--no-whole-archive 
+WHOLE_ARCHIVE += -Wl,--whole-archive ggml/build/src/libggml-base.a -Wl,--no-whole-archive 
+WHOLE_ARCHIVE += -Wl,--whole-archive ggml/build/src/ggml-cpu/libggml-cpu.a -Wl,--no-whole-archive 
 else ifeq ($(OS),Darwin)
 CFLAGS += -framework Metal -framework Foundation -framework MetalKit -framework Accelerate
 CXXFLAGS += -framework Metal -framework Foundation -framework MetalKit -framework Accelerate
-WHOLE_ARCHIVE = -Wl,-force_load,ggml/build/src/libggml.a
+WHOLE_ARCHIVE  = -Wl,-force_load,ggml/build/src/libggml.a
+WHOLE_ARCHIVE += -Wl,-force_load,ggml/build/src/libggml-base.a
+WHOLE_ARCHIVE += -Wl,-force_load,ggml/build/src/libggml-cpu.a
 endif
 
 .PHONY: all clean

diff --git a/fundamentals/ggml/ggml b/fundamentals/ggml/ggml
diff --git a/fundamentals/ggml/src/backprop.c b/fundamentals/ggml/src/backprop.c
@@ -18,6 +18,7 @@ int main(int argc, char **argv) {
     .mem_buffer = NULL,
   };
   struct ggml_context* ctx = ggml_init(params);
+  struct ggml_context* static_ctx = ggml_init(params);
 
   // 'a' represents a parameter in the graph/neural network
   struct ggml_tensor* a = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
@@ -29,8 +30,8 @@ int main(int argc, char **argv) {
 
   printf("Parameter a:\n");
   printf("a: %f\n", ggml_get_f32_1d(a, 0));
-  printf("a->grad: %s\n", a->grad->name);
-  printf("a->grad: %f\n", ggml_get_f32_1d(a->grad, 0));
+  //printf("a->grad: %s\n", ggml_graph_get_grad(b_graph, a->grad->name));
+  //printf("a->grad: %f\n", ggml_get_f32_1d(a->grad, 0));
   printf("\n");
 
   // 'b' represents another parameter in the graph/neural network
@@ -41,8 +42,8 @@ int main(int argc, char **argv) {
   ggml_set_param(ctx, b);
   printf("Parameter b:\n");
   printf("b: %f\n", ggml_get_f32_1d(b, 0));
-  printf("b->grad: %s\n", b->grad->name);
-  printf("b->grad: %f\n", ggml_get_f32_1d(b->grad, 0));
+  //printf("b->grad: %s\n", b->grad->name);
+  //printf("b->grad: %f\n", ggml_get_f32_1d(b->grad, 0));
   printf("\n");
 
   printf("Operation/Output tensor mul:\n");
@@ -51,7 +52,7 @@ int main(int argc, char **argv) {
   printf("mul->op: %s\n", ggml_op_name(mul->op));
   printf("mul->src0: %s\n", mul->src[0]->name);
   printf("mul->src1: %s\n", mul->src[1]->name);
-  printf("mul->grad: %s\n", mul->grad->name);
+  //printf("mul->grad: %s\n", mul->grad->name);
   printf("\n");
 
   struct ggml_cgraph* f_graph = ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, true);
@@ -74,25 +75,27 @@ int main(int argc, char **argv) {
   ggml_graph_reset(f_graph);
 
   struct ggml_cgraph* b_graph = ggml_graph_dup(ctx, f_graph);
-  ggml_build_backward_expand(ctx, f_graph, b_graph, /*accumulate*/ false);
+  ggml_build_backward_expand(static_ctx, ctx, b_graph, /*accumulate*/ false);
   ggml_graph_print(b_graph);
 
   // Set the gradient of the output tensor (mul) which would be the value of
   // the loss function.
-  ggml_set_f32(mul->grad, 2.0f);
+  const ggml_tensor* a_grad = ggml_graph_get_grad(b_graph, a);
+  printf("a->grad: %f\n", ggml_get_f32_1d(a_grad, 0));
+  ggml_set_f32(mul, 2.0f);
   // Compute the gradients
   printf("[Perform backward pass]\n\n");
   ggml_graph_compute_with_ctx(ctx, b_graph, 1);
 
   printf("Updated gradients:\n");
-  printf("a->grad: %f\n", ggml_get_f32_1d(a->grad, 0));
-  printf("b->grad: %f\n", ggml_get_f32_1d(b->grad, 0));
+  //printf("a->grad: %f\n", ggml_get_f32_1d(static_ctx, ggml_graph_get_grad(b_graph, a)));
+  //printf("b->grad: %f\n", ggml_get_f32_1d(b->grad, 0));
   printf("\n");
 
   // Now, a and b values would be updated using the gradients computed above.
   float learning_rate = 0.01;
-  ggml_set_f32_1d(a, 0, ggml_get_f32_1d(a, 0) - learning_rate * ggml_get_f32_1d(a->grad, 0));
-  ggml_set_f32_1d(b, 0, ggml_get_f32_1d(b, 0) - learning_rate * ggml_get_f32_1d(b->grad, 0));
+  //ggml_set_f32_1d(a, 0, ggml_get_f32_1d(a, 0) - learning_rate * ggml_get_f32_1d(a->grad, 0));
+  //ggml_set_f32_1d(b, 0, ggml_get_f32_1d(b, 0) - learning_rate * ggml_get_f32_1d(b->grad, 0));
 
   printf("Updated parameters a and b:\n");
   printf("a: %f\n", ggml_get_f32_1d(a, 0));

diff --git a/fundamentals/ggml/src/device.c b/fundamentals/ggml/src/device.c
@@ -0,0 +1,45 @@
+#include <stdio.h>
+
+#include "ggml.h"
+#include "ggml-alloc.h"
+#include "ggml-backend.h"
+
+void print_backend_info(ggml_backend_buffer_t buffer, struct ggml_context* ctx) {
+      printf("------- device info -------\n");
+      printf("buffer name: %s\n", ggml_backend_buffer_name(buffer));
+      printf("buffer size: %ld\n", ggml_backend_buffer_get_size(buffer));
+      printf("buffer alignment: %ld\n", ggml_backend_buffer_get_alignment(buffer));
+      printf("buffer max size: %ld\n", ggml_backend_buffer_get_max_size(buffer));
+      printf("buffer is host: %d\n", ggml_backend_buffer_is_host(buffer));
+
+      ggml_backend_buffer_type_t buffer_type = ggml_backend_buffer_get_type(buffer);
+      printf("buffer type name: %s\n", ggml_backend_buft_name(buffer_type));
+      printf("buffer type alignment: %ld\n", ggml_backend_buft_get_alignment(buffer_type));
+      printf("buffer type max size: %ld\n", ggml_backend_buft_get_max_size(buffer_type));
+      printf("buffer type is host: %d\n", ggml_backend_buft_is_host(buffer_type));
+}
+
+int main(int argc, char **argv) {
+  printf("GGML device examples\n");
+  size_t device_count = ggml_backend_dev_count();
+  printf("device count: %ld\n", device_count);
+
+  ggml_backend_dev_t device = ggml_backend_dev_get(0);
+  printf("device name: %s\n", ggml_backend_dev_name(device));
+  printf("device description: %s\n", ggml_backend_dev_description(device));
+
+  enum ggml_backend_dev_type type = ggml_backend_dev_type(device);
+  ggml_backend_t backend = ggml_backend_init_by_type(type, NULL);
+  printf("backend name: %s\n", ggml_backend_name(backend));
+  if (type == GGML_BACKEND_DEVICE_TYPE_CPU) {
+      printf("backend type: GGML_BACKEND_DEVICE_TYPE_CPU\n");
+  }
+
+  ggml_backend_buffer_type_t buf_type = ggml_backend_get_default_buffer_type(backend);
+  printf("buffer type name: %s\n", ggml_backend_buft_name(buf_type));
+
+  ggml_backend_buffer_t buffer =  ggml_backend_buft_alloc_buffer(buf_type, 100);
+  printf("buffer name: %s\n", ggml_backend_buffer_name(buffer));
+
+  return 0;
+}
diff --git a/fundamentals/ggml/src/ggml-quants.c b/fundamentals/ggml/src/ggml-quants.c
@@ -45,7 +45,7 @@ int main(int argc, char **argv) {
   printf("ggml type trait is_quantized: %s\n", q4_0->is_quantized ? "true" : "false");
   // The type trait contains function pointers to the quantize and dequantize
   // functions
-  q4_0->from_float(data, &block_q4_0, 32);
+  q4_0->from_float_ref(data, &block_q4_0, 32);
   for (int i = 0; i < QK4_0/2; i++) {
     printf("block_q4_0.qs[%d]: %d\n", i, block_q4_0.qs[i]);
   }

diff --git a/fundamentals/ggml/src/opt-adam.c b/fundamentals/ggml/src/opt-adam.c
@@ -1,6 +1,7 @@
 #include <stdio.h>
 
 #include "ggml.h"
+#include "ggml-cpu.h"
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 
@@ -13,6 +14,8 @@ int main(int argc, char **argv) {
   };
   struct ggml_context* ctx = ggml_init(params);
 
+  /* TODO: revisit this code after reading up on the new optimizer API
+  
   struct ggml_opt_params opts = ggml_opt_default_params(GGML_OPT_TYPE_ADAM);
   struct ggml_tensor* a = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
   ggml_set_param(ctx, a);
@@ -25,6 +28,7 @@ int main(int argc, char **argv) {
   ggml_graph_compute_with_ctx(ctx, cgraph, 1);
 
   printf("a: n_elements: %ld\n", ggml_nelements(a));
+  */
 
   ggml_free(ctx);
   return 0;

diff --git a/fundamentals/ggml/src/opt-lbfgs.c b/fundamentals/ggml/src/opt-lbfgs.c
@@ -13,6 +13,8 @@ int main(int argc, char **argv) {
   };
   struct ggml_context* ctx = ggml_init(params);
 
+  /* TODO: revisit this code after reading up on the new optimizer API
+
   // Simulate a sequence of 6 tokens with en embedding size of 4096 and a
   // context length of 512. 
   int n_ctx_orig = 4096;
@@ -119,6 +121,7 @@ int main(int argc, char **argv) {
   ggml_graph_compute_with_ctx(ctx, cgraph, 1);
 
   //printf("a: n_elements: %ld\n", ggml_nelements(s));
+  */
 
   ggml_free(ctx);
   return 0;

diff --git a/fundamentals/ggml/src/outer-product.c b/fundamentals/ggml/src/outer-product.c
@@ -29,7 +29,6 @@ int main(int argc, char **argv) {
   ggml_set_name(out, "out_prod");
   printf("out->src0: %s\n", out->src[0]->name);
   printf("out->src1: %s\n", out->src[1]->name);
-  printf("out->grad: %s\n", out->grad->name);
   printf("out->ne[0]: %ld\n", out->ne[0]);
   printf("out->ne[1]: %ld\n", out->ne[1]);
   printf("\n");

diff --git a/fundamentals/ggml/src/softmax.c b/fundamentals/ggml/src/softmax.c
@@ -5,6 +5,7 @@
 #include "ggml.h"
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
+#include "ggml-cpu.h"
 
 int main(int argc, char **argv) {
   printf("GGML softmax example\n");

diff --git a/fundamentals/ggml/src/tensor.c b/fundamentals/ggml/src/tensor.c
@@ -30,9 +30,7 @@ int main(int argc, char **argv) {
   // created by a + b = c, c being the tensor then the op would be GGML_OP_ADD.
   printf("x tensor operation: %s, %s\n", ggml_op_name(x->op), ggml_op_symbol(x->op));
   // ggml_tensor's are used as the base unit values in the library, similar to
-  // the Value struct in the LLM zero-to-hero tutorial. These values support
-  // automatic differentiation, so they have a grad field. 
-  printf("x tensor grad: %p\n", x->grad);
+  // the Value struct in the LLM zero-to-hero tutorial.
   // src are the values that were used to create the tensor, for example if the
   // tensor was created by a + b = c, then the src would be a and b.
   printf("x tensor src: %p\n", x->src);