Skip to content

Commit 78cf598

Browse files
committed
AArch64: Add native implementation for poly_caddq
Signed-off-by: Matthias J. Kannwischer <[email protected]>
1 parent ffdecdb commit 78cf598

File tree

3 files changed

+68
-0
lines changed

3 files changed

+68
-0
lines changed

mldsa/native/aarch64/meta.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#define MLD_USE_NATIVE_REJ_UNIFORM_ETA4
1616
#define MLD_USE_NATIVE_POLY_DECOMPOSE_32
1717
#define MLD_USE_NATIVE_POLY_DECOMPOSE_88
18+
#define MLD_USE_NATIVE_POLY_CADDQ
1819

1920
/* Identifier for this backend so that source and assembly files
2021
* in the build can be appropriately guarded. */
@@ -107,6 +108,11 @@ static MLD_INLINE void mld_poly_decompose_88_native(int32_t *a1, int32_t *a0,
107108
mld_poly_decompose_88_asm(a1, a0, a);
108109
}
109110

111+
static MLD_INLINE void mld_poly_caddq_native(int32_t a[MLDSA_N])
112+
{
113+
mld_poly_caddq_asm(a);
114+
}
115+
110116
#endif /* !__ASSEMBLER__ */
111117

112118
#endif /* !MLD_NATIVE_AARCH64_META_H */

mldsa/native/aarch64/src/arith_native_aarch64.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,4 +68,7 @@ void mld_poly_decompose_32_asm(int32_t *a1, int32_t *a0, const int32_t *a);
6868
#define mld_poly_decompose_88_asm MLD_NAMESPACE(poly_decompose_88_asm)
6969
void mld_poly_decompose_88_asm(int32_t *a1, int32_t *a0, const int32_t *a);
7070

71+
#define mld_poly_caddq_asm MLD_NAMESPACE(poly_caddq_asm)
72+
void mld_poly_caddq_asm(int32_t *a);
73+
7174
#endif /* !MLD_NATIVE_AARCH64_SRC_ARITH_NATIVE_AARCH64_H */
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/*
2+
* Copyright (c) The mldsa-native project authors
3+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
4+
*/
5+
#include "../../../common.h"
6+
7+
#if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED)
8+
9+
.macro caddq inout
10+
ushr tmp.4s, \inout\().4s, #31
11+
mla \inout\().4s, tmp.4s, q_reg.4s
12+
.endm
13+
14+
.global MLD_ASM_NAMESPACE(poly_caddq_asm)
15+
.balign 16
16+
MLD_ASM_FN_SYMBOL(poly_caddq_asm)
17+
// Function signature: void mld_poly_caddq_asm(int32_t *a)
18+
// x0: pointer to polynomial coefficients
19+
20+
// Register assignments
21+
a_ptr .req x0
22+
count .req x1
23+
q_reg .req v4
24+
tmp .req v5
25+
26+
// Load constants
27+
// MLDSA_Q = 8380417 = 0x7FE001
28+
movz w9, #0xE001
29+
movk w9, #0x7F, lsl #16
30+
dup q_reg.4s, w9 // Load Q values
31+
32+
mov count, #64/4
33+
poly_caddq_loop:
34+
ldr q0, [a_ptr, #0*16]
35+
ldr q1, [a_ptr, #1*16]
36+
ldr q2, [a_ptr, #2*16]
37+
ldr q3, [a_ptr, #3*16]
38+
39+
caddq v0
40+
caddq v1
41+
caddq v2
42+
caddq v3
43+
44+
str q1, [a_ptr, #1*16]
45+
str q2, [a_ptr, #2*16]
46+
str q3, [a_ptr, #3*16]
47+
str q0, [a_ptr], #4*16
48+
49+
subs count, count, #1
50+
bne poly_caddq_loop
51+
52+
ret
53+
54+
.unreq a_ptr
55+
.unreq count
56+
.unreq q_reg
57+
.unreq tmp
58+
59+
#endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_MULTILEVEL_NO_SHARED */

0 commit comments

Comments
 (0)