From 47e6f525682478ab3ff4fcb2287dbb57b38b7fd8 Mon Sep 17 00:00:00 2001 From: Satya Jandhyala Date: Fri, 22 Nov 2024 23:45:48 -0800 Subject: [PATCH] Bug fix --- onnxruntime/contrib_ops/webgpu/bert/attention.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/contrib_ops/webgpu/bert/attention.cc b/onnxruntime/contrib_ops/webgpu/bert/attention.cc index ea8aa95614b40..089cde1669385 100644 --- a/onnxruntime/contrib_ops/webgpu/bert/attention.cc +++ b/onnxruntime/contrib_ops/webgpu/bert/attention.cc @@ -462,7 +462,7 @@ Status ComputeVxAttentionScore(onnxruntime::webgpu::ComputeContext& context, int {static_cast(parameters.v_head_size_)}, {static_cast(parameters.num_heads_)}, {static_cast(parameters.head_size_)}, - {static_cast(parameters.v_hidden_size_)}, + {static_cast(parameters.v_hidden_size_ * parameters.n_reps)}, {static_cast(past_sequence_length)}, {static_cast(parameters.kv_sequence_length_)}, {static_cast(seqlen_k == nullptr ? total_sequence_length : parameters.seqlen_present_kv_cache_)},