Skip to content

Commit c759027

Browse files
author
Jiashi Li
committed
Fix accuracy issue in sum_OdO kernel
1 parent ef5b1a6 commit c759027

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

csrc/sm100/kernel/fmha_kernel_bwd_sum_OdO.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ struct FmhaKernelBwdSumOdO {
140140
*reinterpret_cast<Vec*>(value_dO) = *reinterpret_cast<const Vec*>(&ptr_dO_bhq[idx_d]);
141141

142142
for (int v = 0; v < kElementsPerLoad; v++) {
143-
acc += value_O[v] * value_dO[v];
143+
acc += ElementAcc(value_O[v]) * ElementAcc(value_dO[v]);
144144
}
145145
}
146146

0 commit comments

Comments
 (0)