Skip to content

Commit

Permalink
ggml: update ggml submodule
Browse files Browse the repository at this point in the history
  • Loading branch information
danbev committed Nov 20, 2024
1 parent 3e087d6 commit 0478f68
Show file tree
Hide file tree
Showing 10 changed files with 76 additions and 19 deletions.
8 changes: 6 additions & 2 deletions fundamentals/ggml/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,15 @@ TARGETS_CPP := $(patsubst $(SRCDIR)/%.cpp, %, $(SOURCES_CPP))
ifeq ($(OS),Linux)
CFLAGS += -fopenmp
CXXFLAGS += -fopenmp
WHOLE_ARCHIVE = -Wl,--whole-archive ggml/build/src/libggml.a -Wl,--no-whole-archive
WHOLE_ARCHIVE = -Wl,--whole-archive ggml/build/src/libggml.a -Wl,--no-whole-archive
WHOLE_ARCHIVE += -Wl,--whole-archive ggml/build/src/libggml-base.a -Wl,--no-whole-archive
WHOLE_ARCHIVE += -Wl,--whole-archive ggml/build/src/ggml-cpu/libggml-cpu.a -Wl,--no-whole-archive
else ifeq ($(OS),Darwin)
CFLAGS += -framework Metal -framework Foundation -framework MetalKit -framework Accelerate
CXXFLAGS += -framework Metal -framework Foundation -framework MetalKit -framework Accelerate
WHOLE_ARCHIVE = -Wl,-force_load,ggml/build/src/libggml.a
WHOLE_ARCHIVE = -Wl,-force_load,ggml/build/src/libggml.a
WHOLE_ARCHIVE += -Wl,-force_load,ggml/build/src/libggml-base.a
WHOLE_ARCHIVE += -Wl,-force_load,ggml/build/src/libggml-cpu.a
endif

.PHONY: all clean
Expand Down
2 changes: 1 addition & 1 deletion fundamentals/ggml/ggml
Submodule ggml updated from 8a3d79 to 2884dd
25 changes: 14 additions & 11 deletions fundamentals/ggml/src/backprop.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ int main(int argc, char **argv) {
.mem_buffer = NULL,
};
struct ggml_context* ctx = ggml_init(params);
struct ggml_context* static_ctx = ggml_init(params);

// 'a' represents a parameter in the graph/neural network
struct ggml_tensor* a = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
Expand All @@ -29,8 +30,8 @@ int main(int argc, char **argv) {

printf("Parameter a:\n");
printf("a: %f\n", ggml_get_f32_1d(a, 0));
printf("a->grad: %s\n", a->grad->name);
printf("a->grad: %f\n", ggml_get_f32_1d(a->grad, 0));
//printf("a->grad: %s\n", ggml_graph_get_grad(b_graph, a->grad->name));
//printf("a->grad: %f\n", ggml_get_f32_1d(a->grad, 0));
printf("\n");

// 'b' represents another parameter in the graph/neural network
Expand All @@ -41,8 +42,8 @@ int main(int argc, char **argv) {
ggml_set_param(ctx, b);
printf("Parameter b:\n");
printf("b: %f\n", ggml_get_f32_1d(b, 0));
printf("b->grad: %s\n", b->grad->name);
printf("b->grad: %f\n", ggml_get_f32_1d(b->grad, 0));
//printf("b->grad: %s\n", b->grad->name);
//printf("b->grad: %f\n", ggml_get_f32_1d(b->grad, 0));
printf("\n");

printf("Operation/Output tensor mul:\n");
Expand All @@ -51,7 +52,7 @@ int main(int argc, char **argv) {
printf("mul->op: %s\n", ggml_op_name(mul->op));
printf("mul->src0: %s\n", mul->src[0]->name);
printf("mul->src1: %s\n", mul->src[1]->name);
printf("mul->grad: %s\n", mul->grad->name);
//printf("mul->grad: %s\n", mul->grad->name);
printf("\n");

struct ggml_cgraph* f_graph = ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, true);
Expand All @@ -74,25 +75,27 @@ int main(int argc, char **argv) {
ggml_graph_reset(f_graph);

struct ggml_cgraph* b_graph = ggml_graph_dup(ctx, f_graph);
ggml_build_backward_expand(ctx, f_graph, b_graph, /*accumulate*/ false);
ggml_build_backward_expand(static_ctx, ctx, b_graph, /*accumulate*/ false);
ggml_graph_print(b_graph);

// Set the gradient of the output tensor (mul) which would be the value of
// the loss function.
ggml_set_f32(mul->grad, 2.0f);
const ggml_tensor* a_grad = ggml_graph_get_grad(b_graph, a);
printf("a->grad: %f\n", ggml_get_f32_1d(a_grad, 0));
ggml_set_f32(mul, 2.0f);
// Compute the gradients
printf("[Perform backward pass]\n\n");
ggml_graph_compute_with_ctx(ctx, b_graph, 1);

printf("Updated gradients:\n");
printf("a->grad: %f\n", ggml_get_f32_1d(a->grad, 0));
printf("b->grad: %f\n", ggml_get_f32_1d(b->grad, 0));
//printf("a->grad: %f\n", ggml_get_f32_1d(static_ctx, ggml_graph_get_grad(b_graph, a)));
//printf("b->grad: %f\n", ggml_get_f32_1d(b->grad, 0));
printf("\n");

// Now, a and b values would be updated using the gradients computed above.
float learning_rate = 0.01;
ggml_set_f32_1d(a, 0, ggml_get_f32_1d(a, 0) - learning_rate * ggml_get_f32_1d(a->grad, 0));
ggml_set_f32_1d(b, 0, ggml_get_f32_1d(b, 0) - learning_rate * ggml_get_f32_1d(b->grad, 0));
//ggml_set_f32_1d(a, 0, ggml_get_f32_1d(a, 0) - learning_rate * ggml_get_f32_1d(a->grad, 0));
//ggml_set_f32_1d(b, 0, ggml_get_f32_1d(b, 0) - learning_rate * ggml_get_f32_1d(b->grad, 0));

printf("Updated parameters a and b:\n");
printf("a: %f\n", ggml_get_f32_1d(a, 0));
Expand Down
45 changes: 45 additions & 0 deletions fundamentals/ggml/src/device.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#include <stdio.h>

#include "ggml.h"
#include "ggml-alloc.h"
#include "ggml-backend.h"

void print_backend_info(ggml_backend_buffer_t buffer, struct ggml_context* ctx) {
printf("------- device info -------\n");
printf("buffer name: %s\n", ggml_backend_buffer_name(buffer));
printf("buffer size: %ld\n", ggml_backend_buffer_get_size(buffer));
printf("buffer alignment: %ld\n", ggml_backend_buffer_get_alignment(buffer));
printf("buffer max size: %ld\n", ggml_backend_buffer_get_max_size(buffer));
printf("buffer is host: %d\n", ggml_backend_buffer_is_host(buffer));

ggml_backend_buffer_type_t buffer_type = ggml_backend_buffer_get_type(buffer);
printf("buffer type name: %s\n", ggml_backend_buft_name(buffer_type));
printf("buffer type alignment: %ld\n", ggml_backend_buft_get_alignment(buffer_type));
printf("buffer type max size: %ld\n", ggml_backend_buft_get_max_size(buffer_type));
printf("buffer type is host: %d\n", ggml_backend_buft_is_host(buffer_type));
}

int main(int argc, char **argv) {
printf("GGML device examples\n");
size_t device_count = ggml_backend_dev_count();
printf("device count: %ld\n", device_count);

ggml_backend_dev_t device = ggml_backend_dev_get(0);
printf("device name: %s\n", ggml_backend_dev_name(device));
printf("device description: %s\n", ggml_backend_dev_description(device));

enum ggml_backend_dev_type type = ggml_backend_dev_type(device);
ggml_backend_t backend = ggml_backend_init_by_type(type, NULL);
printf("backend name: %s\n", ggml_backend_name(backend));
if (type == GGML_BACKEND_DEVICE_TYPE_CPU) {
printf("backend type: GGML_BACKEND_DEVICE_TYPE_CPU\n");
}

ggml_backend_buffer_type_t buf_type = ggml_backend_get_default_buffer_type(backend);
printf("buffer type name: %s\n", ggml_backend_buft_name(buf_type));

ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buf_type, 100);
printf("buffer name: %s\n", ggml_backend_buffer_name(buffer));

return 0;
}
2 changes: 1 addition & 1 deletion fundamentals/ggml/src/ggml-quants.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ int main(int argc, char **argv) {
printf("ggml type trait is_quantized: %s\n", q4_0->is_quantized ? "true" : "false");
// The type trait contains function pointers to the quantize and dequantize
// functions
q4_0->from_float(data, &block_q4_0, 32);
q4_0->from_float_ref(data, &block_q4_0, 32);
for (int i = 0; i < QK4_0/2; i++) {
printf("block_q4_0.qs[%d]: %d\n", i, block_q4_0.qs[i]);
}
Expand Down
4 changes: 4 additions & 0 deletions fundamentals/ggml/src/opt-adam.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <stdio.h>

#include "ggml.h"
#include "ggml-cpu.h"
#include "ggml-alloc.h"
#include "ggml-backend.h"

Expand All @@ -13,6 +14,8 @@ int main(int argc, char **argv) {
};
struct ggml_context* ctx = ggml_init(params);

/* TODO: revisit this code after reading up on the new optimizer API
struct ggml_opt_params opts = ggml_opt_default_params(GGML_OPT_TYPE_ADAM);
struct ggml_tensor* a = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
ggml_set_param(ctx, a);
Expand All @@ -25,6 +28,7 @@ int main(int argc, char **argv) {
ggml_graph_compute_with_ctx(ctx, cgraph, 1);
printf("a: n_elements: %ld\n", ggml_nelements(a));
*/

ggml_free(ctx);
return 0;
Expand Down
3 changes: 3 additions & 0 deletions fundamentals/ggml/src/opt-lbfgs.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ int main(int argc, char **argv) {
};
struct ggml_context* ctx = ggml_init(params);

/* TODO: revisit this code after reading up on the new optimizer API
// Simulate a sequence of 6 tokens with en embedding size of 4096 and a
// context length of 512.
int n_ctx_orig = 4096;
Expand Down Expand Up @@ -119,6 +121,7 @@ int main(int argc, char **argv) {
ggml_graph_compute_with_ctx(ctx, cgraph, 1);
//printf("a: n_elements: %ld\n", ggml_nelements(s));
*/

ggml_free(ctx);
return 0;
Expand Down
1 change: 0 additions & 1 deletion fundamentals/ggml/src/outer-product.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ int main(int argc, char **argv) {
ggml_set_name(out, "out_prod");
printf("out->src0: %s\n", out->src[0]->name);
printf("out->src1: %s\n", out->src[1]->name);
printf("out->grad: %s\n", out->grad->name);
printf("out->ne[0]: %ld\n", out->ne[0]);
printf("out->ne[1]: %ld\n", out->ne[1]);
printf("\n");
Expand Down
1 change: 1 addition & 0 deletions fundamentals/ggml/src/softmax.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "ggml.h"
#include "ggml-alloc.h"
#include "ggml-backend.h"
#include "ggml-cpu.h"

int main(int argc, char **argv) {
printf("GGML softmax example\n");
Expand Down
4 changes: 1 addition & 3 deletions fundamentals/ggml/src/tensor.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,7 @@ int main(int argc, char **argv) {
// created by a + b = c, c being the tensor then the op would be GGML_OP_ADD.
printf("x tensor operation: %s, %s\n", ggml_op_name(x->op), ggml_op_symbol(x->op));
// ggml_tensor's are used as the base unit values in the library, similar to
// the Value struct in the LLM zero-to-hero tutorial. These values support
// automatic differentiation, so they have a grad field.
printf("x tensor grad: %p\n", x->grad);
// the Value struct in the LLM zero-to-hero tutorial.
// src are the values that were used to create the tensor, for example if the
// tensor was created by a + b = c, then the src would be a and b.
printf("x tensor src: %p\n", x->src);
Expand Down

0 comments on commit 0478f68

Please sign in to comment.