diff --git a/onnxruntime/contrib_ops/webgpu/bert/attention.cc b/onnxruntime/contrib_ops/webgpu/bert/attention.cc index ea8aa95614b40..089cde1669385 100644 --- a/onnxruntime/contrib_ops/webgpu/bert/attention.cc +++ b/onnxruntime/contrib_ops/webgpu/bert/attention.cc @@ -462,7 +462,7 @@ Status ComputeVxAttentionScore(onnxruntime::webgpu::ComputeContext& context, int {static_cast(parameters.v_head_size_)}, {static_cast(parameters.num_heads_)}, {static_cast(parameters.head_size_)}, - {static_cast(parameters.v_hidden_size_)}, + {static_cast(parameters.v_hidden_size_ * parameters.n_reps)}, {static_cast(past_sequence_length)}, {static_cast(parameters.kv_sequence_length_)}, {static_cast(seqlen_k == nullptr ? total_sequence_length : parameters.seqlen_present_kv_cache_)},