Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SYCL: Migrate away from deprecated ggml_tensor->backend #10840

Merged
merged 11 commits into from
Dec 20, 2024
Merged
12 changes: 7 additions & 5 deletions ggml/src/ggml-sycl/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
//

#include "common.hpp"

#include "ggml-backend-impl.h"
#include "ggml-impl.h"

int get_current_device_id() {
Expand Down Expand Up @@ -65,9 +67,9 @@ void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *sr
const ggml_sycl_op_flatten_t op) try {

const bool use_src1 = src1 != nullptr;

GGML_ASSERT(!use_src1 || src1->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
GGML_ASSERT( dst->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
if(use_src1)
GGML_ASSERT(strcmp(src1->buffer->buft->iface.get_name(src1->buffer->buft), GGML_SYCL_NAME "_Split") != 0);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0);

// dd = data device
float * src0_ddf = (float *) src0->data;
Expand All @@ -80,8 +82,8 @@ void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *sr

ggml_sycl_set_device(ctx.device);
queue_ptr main_stream = ctx.stream();
// GGML_SYCL_DEBUG("ctx.device=%d, main_stream=%p src0_on_device=%d, src1_on_device=%d, dst_on_device=%d\n",
// ctx.device, main_stream, src0_on_device, src1_on_device, dst_on_device);
// GGML_LOG_DEBUG("ctx.device=%d, main_stream=%p src0_on_device=%d, src1_on_device=%d, dst_on_device=%d\n",
// ctx.device, main_stream, src0_on_device, src1_on_device, dst_on_device);

// do the computation
op(ctx, src0, src1, dst, src0_ddf, src1_ddf, dst_ddf, main_stream);
Expand Down
15 changes: 6 additions & 9 deletions ggml/src/ggml-sycl/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,16 @@

#define GGML_COMMON_DECL_SYCL
#define GGML_COMMON_IMPL_SYCL
/* suppress warning spam */
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wnested-anon-types"
#include "ggml-common.h"
#pragma clang diagnostic pop
#include "ggml-impl.h"

void* ggml_sycl_host_malloc(size_t size);
void ggml_sycl_host_free(void* ptr);

static int g_ggml_sycl_debug = 0;
#define GGML_SYCL_DEBUG(...) \
do { \
if (g_ggml_sycl_debug) \
fprintf(stderr, __VA_ARGS__); \
} while (0)

#define CHECK_TRY_ERROR(expr) \
[&]() { \
try { \
Expand Down Expand Up @@ -163,8 +161,7 @@ inline dpct::err0 ggml_sycl_set_device(const int device) try {
int current_device_id;
SYCL_CHECK(CHECK_TRY_ERROR(current_device_id = get_current_device_id()));

// GGML_SYCL_DEBUG("ggml_sycl_set_device device_id=%d,
// current_device_id=%d\n", device, current_device);
GGML_LOG_DEBUG("ggml_sycl_set_device device_id=%d,current_device_id=%d\n", device, current_device_id);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no need to restore code.
This log will appear more times during inference,
Suggest remark it as default.

if (device == current_device_id) {
return 0;
}
Expand Down
97 changes: 49 additions & 48 deletions ggml/src/ggml-sycl/element_wise.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "common.hpp"
#include "element_wise.hpp"
#include "ggml-impl.h"

void acc_f32(const float * x, const float * y, float * dst, const int ne,
const int ne10, const int ne11, const int ne12,
Expand Down Expand Up @@ -883,148 +884,148 @@ inline void ggml_sycl_op_div(ggml_backend_sycl_context & ctx, const ggml_tensor


void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
GGML_LOG_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sqrt);
GGML_SYCL_DEBUG("call %s done\n", __func__);
GGML_LOG_DEBUG("call %s done\n", __func__);
}

void ggml_sycl_sin(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
GGML_LOG_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sin);
GGML_SYCL_DEBUG("call %s done\n", __func__);
GGML_LOG_DEBUG("call %s done\n", __func__);
}

void ggml_sycl_cos(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
GGML_LOG_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_cos);
GGML_SYCL_DEBUG("call %s done\n", __func__);
GGML_LOG_DEBUG("call %s done\n", __func__);
}

void ggml_sycl_acc(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
GGML_LOG_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_acc);
GGML_SYCL_DEBUG("call %s done\n", __func__);
GGML_LOG_DEBUG("call %s done\n", __func__);
}

void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
GGML_LOG_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_gelu);
GGML_SYCL_DEBUG("call %s done\n", __func__);
GGML_LOG_DEBUG("call %s done\n", __func__);
}

void ggml_sycl_silu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
GGML_LOG_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_silu);
GGML_SYCL_DEBUG("call %s done\n", __func__);
GGML_LOG_DEBUG("call %s done\n", __func__);
}

void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
GGML_LOG_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_gelu_quick);
GGML_SYCL_DEBUG("call %s done\n", __func__);
GGML_LOG_DEBUG("call %s done\n", __func__);
}

void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
GGML_LOG_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_tanh);
GGML_SYCL_DEBUG("call %s done\n", __func__);
GGML_LOG_DEBUG("call %s done\n", __func__);
}

void ggml_sycl_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
GGML_LOG_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_relu);
GGML_SYCL_DEBUG("call %s done\n", __func__);
GGML_LOG_DEBUG("call %s done\n", __func__);
}

void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
GGML_LOG_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sigmoid);
GGML_SYCL_DEBUG("call %s done\n", __func__);
GGML_LOG_DEBUG("call %s done\n", __func__);
}

void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
GGML_LOG_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_hardsigmoid);
GGML_SYCL_DEBUG("call %s done\n", __func__);
GGML_LOG_DEBUG("call %s done\n", __func__);
}

void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
GGML_LOG_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_hardswish);
GGML_SYCL_DEBUG("call %s done\n", __func__);
GGML_LOG_DEBUG("call %s done\n", __func__);
}


void ggml_sycl_exp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
GGML_LOG_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_exp);
GGML_SYCL_DEBUG("call %s done\n", __func__);
GGML_LOG_DEBUG("call %s done\n", __func__);
}

void ggml_sycl_log(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
GGML_LOG_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_log);
GGML_SYCL_DEBUG("call %s done\n", __func__);
GGML_LOG_DEBUG("call %s done\n", __func__);
}

void ggml_sycl_neg(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
GGML_LOG_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_neg);
GGML_SYCL_DEBUG("call %s done\n", __func__);
GGML_LOG_DEBUG("call %s done\n", __func__);
}

void ggml_sycl_step(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
GGML_LOG_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_step);
GGML_SYCL_DEBUG("call %s done\n", __func__);
GGML_LOG_DEBUG("call %s done\n", __func__);
}

void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
GGML_LOG_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_leaky_relu);
GGML_SYCL_DEBUG("call %s done\n", __func__);
GGML_LOG_DEBUG("call %s done\n", __func__);
}

void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
GGML_LOG_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sqr);
GGML_SYCL_DEBUG("call %s done\n", __func__);
GGML_LOG_DEBUG("call %s done\n", __func__);
}

void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
GGML_LOG_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_upscale);
GGML_SYCL_DEBUG("call %s done\n", __func__);
GGML_LOG_DEBUG("call %s done\n", __func__);
}

void ggml_sycl_pad(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
GGML_LOG_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_pad);
GGML_SYCL_DEBUG("call %s done\n", __func__);
GGML_LOG_DEBUG("call %s done\n", __func__);
}



void ggml_sycl_add(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
GGML_LOG_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_add);
GGML_SYCL_DEBUG("call %s done\n", __func__);
GGML_LOG_DEBUG("call %s done\n", __func__);
}

void ggml_sycl_sub(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
GGML_LOG_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sub);
GGML_SYCL_DEBUG("call %s done\n", __func__);
GGML_LOG_DEBUG("call %s done\n", __func__);
}

void ggml_sycl_mul(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
GGML_LOG_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_mul);
GGML_SYCL_DEBUG("call %s done\n", __func__);
GGML_LOG_DEBUG("call %s done\n", __func__);
}

void ggml_sycl_div(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
GGML_LOG_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_div);
GGML_SYCL_DEBUG("call %s done\n", __func__);
GGML_LOG_DEBUG("call %s done\n", __func__);
}
Loading
Loading