Skip to content

Commit

Permalink
ggml: add initial RoPE example
Browse files Browse the repository at this point in the history
Signed-off-by: Daniel Bevenius <[email protected]>
  • Loading branch information
danbev committed Aug 2, 2024
1 parent c4b82f2 commit df52f92
Showing 1 changed file with 132 additions and 0 deletions.
132 changes: 132 additions & 0 deletions fundamentals/ggml/src/rope.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#include <stdio.h>
#include <math.h>

#include "ggml.h"
#include "ggml-alloc.h"
#include "ggml-backend.h"

double calculate_n_rot(double x, double base, int max_pos_emb, int n_dims) {
const double pi = M_PI;
// Calculate the exponent
double exponent = (2.0 * max_pos_emb) / n_dims;
// Calculate base raised to the power of the exponent
double base_to_power = pow(base, exponent);
// Calculate the final result
double n_rot = 2 * pi * x * base_to_power;
return n_rot;
}

int main(int argc, char **argv) {
printf("GGML RoPE example\n");

struct ggml_init_params params = {
.mem_size = 20000000,
.mem_buffer = NULL,
};
struct ggml_context* ctx = ggml_init(params);

// Simulate a sequence of 6 tokens with en embedding size of 4019 and a
// context length of 512. Keep in mind that this tensor is created to be
// used to calculate the coputation graph.
int n_ctx_orig = 4096;
int embd_dim = 128;
int n_head = 32;
int n_tokens = 6;

// The Query matrix in this case can hold 512 tokens each with a dimension
// of 4096.
struct ggml_tensor* query = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_ctx_orig, n_tokens);

// We reshape the query matrix embedding dimensions to account for the number
// of heads (32) each which will have a dimension of 128 (128 * 32 = 4096).
struct ggml_tensor* a = ggml_reshape_3d(ctx, query, embd_dim, n_head, n_tokens);
ggml_set_name(a, "a");

// These are the positions
struct ggml_tensor* pos = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, n_tokens);
ggml_set_name(pos, "pos");

// Set some made up values for the tensor to be rotated.
// First loop over the number of tokens in the batch (6) (skipping the actual
// loop for the batch here though.
for (int i = 0; i < a->ne[2]; i++) {
// Loop over the embedding heads (32)
for (int j = 0; j < a->ne[1]; j++) {
// Loop over the embedding dimensions (128)
for (int k = 0; k < a->ne[0]; k++) {
// TODO: make the value a random value.
//float value = 1.0f + k;
float value = 8.3f + k;
ggml_set_f32_nd(a, k, j, i, 0, value);
}
}
}

// Print a few of the first dimensions so we can see that there is a rotation
// being performed. In this case we are printing the first 10 embeddings for
// the 4th token.
for (int i = 0; i < 10; i++) {
printf("embedding for token 4, embedding dim %d: %f\n", i, ggml_get_f32_nd(a, i, 0, 4, 0));
}

// Set the positions manually (the b tensor parameter to ggml_rope_ext).
for (int i = 0; i < pos->ne[0]; i++) {
ggml_set_i32_1d(pos, i, i);
}

int mode = 0; // rote type 0 = Normal
// The RoPE base frequency
// ↓
// (10000^(-2j/d).
float freq_base = 10000.0f;
// The RoPE frequency scale.
float freq_scale = 1.0f;
// TODO: What is this? It looks like this is mscale (magnituce scale)
float attn_factor = 1.0f;
// Extrapolation factor. If this is 0.0 then the beta_fast and beta_slow
// are not used.
float ext_factor = 1.0f;
// This is a YaRN parameter which I think is named α in the YaRN paper.
float beta_fast = 32.0f;
// This is a YaRN parameter which I think is named β in the YaRN paper.
float beta_slow = 1.0f;
// RoPE Frequency factors are used with certan models like PHI.
struct ggml_tensor* freq_factors = NULL;

struct ggml_tensor* s = ggml_rope_ext(ctx,
a,
pos,
freq_factors,
embd_dim,
mode,
n_ctx_orig,
freq_base,
freq_scale,
ext_factor,
attn_factor,
beta_fast,
beta_slow);

struct ggml_cgraph* c_graph = ggml_new_graph(ctx);
ggml_build_forward_expand(c_graph, s);


int n_threads = 4;
enum ggml_status status = ggml_graph_compute_with_ctx(ctx, c_graph, n_threads);
if (status != GGML_STATUS_SUCCESS) {
printf("Error: %s\n", ggml_status_to_string(status));
return 1;
}

struct ggml_tensor* r = ggml_reshape_2d(ctx, s, n_ctx_orig, n_tokens);

printf("embedding after rotation:\n");
//printf("Rotation: %f\n", *(float *)((char *) s->data + 73728));

for (int i = 0; i < 10; i++) {
printf("embedding for token 4, embedding dim %d = %f\n", i, ggml_get_f32_nd(s, i, 0, 4, 0));
}

ggml_free(ctx);
return 0;
}

0 comments on commit df52f92

Please sign in to comment.