From 44ac8fd1b46d40ce2495919626dd84a26c1efa4e Mon Sep 17 00:00:00 2001 From: Neil Kichler Date: Sat, 1 Nov 2025 23:15:47 +0100 Subject: [PATCH] Fix register index bug in mma.sync.aligned.m16n8k16 --- include/cutlass/arch/mma_sm90.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/cutlass/arch/mma_sm90.h b/include/cutlass/arch/mma_sm90.h index b135c8645b..10481e8c29 100644 --- a/include/cutlass/arch/mma_sm90.h +++ b/include/cutlass/arch/mma_sm90.h @@ -222,7 +222,7 @@ struct Mma< asm volatile("mma.sync.aligned.m16n8k16.row.col.f64.f64.f64.f64 {%0, %1, %2, %3}, {%4, %5, %6, %7, %8, %9, %10, %11}, {%12, %13, %14, %15}, {%16, %17, %18, %19};\n" : "=d"(D[0]), "=d"(D[1]), "=d"(D[2]), "=d"(D[3]) - : "d"(A[0]), "d"(A[2]), "d"(A[2]), "d"(A[3]), "d"(A[4]), "d"(A[5]), "d"(A[6]), "d"(A[7]), + : "d"(A[0]), "d"(A[1]), "d"(A[2]), "d"(A[3]), "d"(A[4]), "d"(A[5]), "d"(A[6]), "d"(A[7]), "d"(B[0]), "d"(B[1]), "d"(B[2]), "d"(B[3]), "d"(C[0]), "d"(C[1]), "d"(C[2]), "d"(C[3]));