diff --git a/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp b/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp index 60be5369bf186..2d79b8b611de3 100644 --- a/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +++ b/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp @@ -564,21 +564,21 @@ static void ggml_gemv_q4_0_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, c #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_DOTPROD) if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) { - const block_q4_0x4 * b_ptr = (const block_q4_0x4 *)vx; + const block_q4_0x4 * b_ptr = (const block_q4_0x4 *) vx; for (int c = 0; c < nc; c += ncols_interleaved) { - const block_q8_0 * a_ptr = (const block_q8_0 *)vy; + const block_q8_0 * a_ptr = (const block_q8_0 *) vy; float32x4_t acc = vdupq_n_f32(0); for (int b = 0; b < nb; b++) { - int8x16_t b0 = vld1q_s8((const int8_t *)b_ptr->qs); - int8x16_t b1 = vld1q_s8((const int8_t *)b_ptr->qs + 16); - int8x16_t b2 = vld1q_s8((const int8_t *)b_ptr->qs + 32); - int8x16_t b3 = vld1q_s8((const int8_t *)b_ptr->qs + 48); - float16x4_t bd = vld1_f16((const __fp16 *)b_ptr->d); + int8x16_t b0 = vld1q_s8((const int8_t *) b_ptr->qs); + int8x16_t b1 = vld1q_s8((const int8_t *) b_ptr->qs + 16); + int8x16_t b2 = vld1q_s8((const int8_t *) b_ptr->qs + 32); + int8x16_t b3 = vld1q_s8((const int8_t *) b_ptr->qs + 48); + float16x4_t bd = vld1_f16((const __fp16 *) b_ptr->d); int8x16_t a0 = vld1q_s8(a_ptr->qs); int8x16_t a1 = vld1q_s8(a_ptr->qs + qk/2); - float16x4_t ad = vld1_dup_f16((const __fp16 *)&a_ptr->d); + float16x4_t ad = vld1_dup_f16((const __fp16 *) &a_ptr->d); int32x4_t ret = vdupq_n_s32(0); @@ -649,23 +649,23 @@ static void ggml_gemv_q4_0_4x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, c #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_DOTPROD) if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) { - const block_q4_0x4 * b_ptr = (const block_q4_0x4 *)vx; + const block_q4_0x4 * b_ptr = (const block_q4_0x4 *) vx; for (int c = 0; c < nc; c += ncols_interleaved) { - const block_q8_0 * a_ptr = (const block_q8_0 *)vy; + const block_q8_0 * a_ptr = (const block_q8_0 *) vy; float32x4_t acc = vdupq_n_f32(0); for (int b = 0; b < nb; b++) { - int8x16_t b0 = vld1q_s8((const int8_t *)b_ptr->qs); - int8x16_t b1 = vld1q_s8((const int8_t *)b_ptr->qs + 16); - int8x16_t b2 = vld1q_s8((const int8_t *)b_ptr->qs + 32); - int8x16_t b3 = vld1q_s8((const int8_t *)b_ptr->qs + 48); - float16x4_t bd = vld1_f16((const __fp16 *)b_ptr->d); - - int8x16_t a0 = (int8x16_t)vld1q_dup_s64((const int64_t *)a_ptr->qs); - int8x16_t a1 = (int8x16_t)vld1q_dup_s64((const int64_t *)a_ptr->qs + 1); - int8x16_t a2 = (int8x16_t)vld1q_dup_s64((const int64_t *)a_ptr->qs + 2); - int8x16_t a3 = (int8x16_t)vld1q_dup_s64((const int64_t *)a_ptr->qs + 3); - float16x4_t ad = vld1_dup_f16((const __fp16 *)&a_ptr->d); + int8x16_t b0 = vld1q_s8((const int8_t *) b_ptr->qs); + int8x16_t b1 = vld1q_s8((const int8_t *) b_ptr->qs + 16); + int8x16_t b2 = vld1q_s8((const int8_t *) b_ptr->qs + 32); + int8x16_t b3 = vld1q_s8((const int8_t *) b_ptr->qs + 48); + float16x4_t bd = vld1_f16((const __fp16 *) b_ptr->d); + + int8x16_t a0 = (int8x16_t) vld1q_dup_s64((const int64_t *) a_ptr->qs); + int8x16_t a1 = (int8x16_t) vld1q_dup_s64((const int64_t *) a_ptr->qs + 1); + int8x16_t a2 = (int8x16_t) vld1q_dup_s64((const int64_t *) a_ptr->qs + 2); + int8x16_t a3 = (int8x16_t) vld1q_dup_s64((const int64_t *) a_ptr->qs + 3); + float16x4_t ad = vld1_dup_f16((const __fp16 *) &a_ptr->d); int32x4_t ret0 = vdupq_n_s32(0); int32x4_t ret1 = vdupq_n_s32(0);