We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent ef5b1a6 commit c759027Copy full SHA for c759027
csrc/sm100/kernel/fmha_kernel_bwd_sum_OdO.hpp
@@ -140,7 +140,7 @@ struct FmhaKernelBwdSumOdO {
140
*reinterpret_cast<Vec*>(value_dO) = *reinterpret_cast<const Vec*>(&ptr_dO_bhq[idx_d]);
141
142
for (int v = 0; v < kElementsPerLoad; v++) {
143
- acc += value_O[v] * value_dO[v];
+ acc += ElementAcc(value_O[v]) * ElementAcc(value_dO[v]);
144
}
145
146
0 commit comments