Skip to content

Commit a65b1d9

Browse files
authored
Merge pull request #594 from pq-code-package/aarch64-intt-twiddle-alignment
AArch64 iNTT: Eliminate unaligned twiddle loads
2 parents ee7e1bf + 5e71082 commit a65b1d9

File tree

5 files changed

+64
-53
lines changed

5 files changed

+64
-53
lines changed

dev/aarch64_clean/src/aarch64_zetas.c

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -195,28 +195,33 @@ MLD_ALIGN const int32_t mld_aarch64_intt_zetas_layer78[] = {
195195
};
196196

197197
MLD_ALIGN const int32_t mld_aarch64_intt_zetas_layer123456[] = {
198-
-2283733, -585207070, -1858416, -476219497, -3345963, -857403734,
199-
-2815639, -721508096, -1853806, -475038184, -2917338, -747568486,
200-
3585098, 918682129, -3870317, -991769559, -556856, -142694469,
201-
642628, 164673562, -3192354, -818041395, 2897314, 742437332,
202-
-1460718, -374309300, 3950053, 1012201926, 1716988, 439978542,
203-
-2453983, -628833668, 1935799, 496048908, -3756790, -962678241,
204-
-1714295, -439288460, 3574466, 915957677, 817536, 209493775,
205-
3227876, 827143915, -1759347, -450833045, -3415069, -875112161,
206-
1335936, 342333886, -2156050, -552488273, -3241972, -830756018,
207-
-676590, -173376332, 4018989, 1029866791, -2071829, -530906624,
208-
434125, 111244624, 3506380, 898510625, -1095468, -280713909,
209-
3524442, 903139016, -928749, -237992130, -394148, -101000509,
210-
1674615, 429120452, -1159875, -297218217, -3704823, -949361686,
211-
-2663378, -682491182, -2101410, -538486762, 3110818, 797147778,
212-
4063053, 1041158200, 3586446, 919027554, -2740543, -702264730,
213-
3370349, 863652652, -3182878, -815613168, -3602218, -923069133,
214-
-294725, -75523344, -3761513, -963888510, -3765607, -964937599,
215-
3201430, 820367122, 3145678, 806080660, 2883726, 738955404,
216-
3201494, 820383522, 1221177, 312926867, -557458, -142848732,
217-
1005239, 257592709, -3764867, -964747974, -2129892, -545785280,
218-
-2682288, -687336873, -3542485, -907762539, 601683, 154181397,
219-
0, 0,
198+
-2283733, -585207070, 0, 0, -1858416, -476219497,
199+
-3345963, -857403734, -2815639, -721508096, 0, 0,
200+
-1853806, -475038184, -2917338, -747568486, 3585098, 918682129,
201+
0, 0, -3870317, -991769559, -556856, -142694469,
202+
642628, 164673562, 0, 0, -3192354, -818041395,
203+
2897314, 742437332, -1460718, -374309300, 0, 0,
204+
3950053, 1012201926, 1716988, 439978542, -2453983, -628833668,
205+
0, 0, 1935799, 496048908, -3756790, -962678241,
206+
-1714295, -439288460, 0, 0, 3574466, 915957677,
207+
817536, 209493775, 3227876, 827143915, 0, 0,
208+
-1759347, -450833045, -3415069, -875112161, 1335936, 342333886,
209+
0, 0, -2156050, -552488273, -3241972, -830756018,
210+
-676590, -173376332, 0, 0, 4018989, 1029866791,
211+
-2071829, -530906624, 434125, 111244624, 0, 0,
212+
3506380, 898510625, -1095468, -280713909, 3524442, 903139016,
213+
0, 0, -928749, -237992130, -394148, -101000509,
214+
1674615, 429120452, 0, 0, -1159875, -297218217,
215+
-3704823, -949361686, -2663378, -682491182, 0, 0,
216+
-2101410, -538486762, 3110818, 797147778, 4063053, 1041158200,
217+
0, 0, 3586446, 919027554, -2740543, -702264730,
218+
3370349, 863652652, 0, 0, -3182878, -815613168,
219+
-3602218, -923069133, -294725, -75523344, -3761513, -963888510,
220+
-3765607, -964937599, 3201430, 820367122, 3145678, 806080660,
221+
2883726, 738955404, 3201494, 820383522, 1221177, 312926867,
222+
-557458, -142848732, 1005239, 257592709, -3764867, -964747974,
223+
-2129892, -545785280, -2682288, -687336873, -3542485, -907762539,
224+
601683, 154181397, 0, 0,
220225
};
221226

222227
#else /* MLD_ARITH_BACKEND_AARCH64 */

dev/aarch64_clean/src/intt.S

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@
7474
.endm
7575

7676
.macro load_next_roots_56
77-
ldr q_root1, [r123456_ptr], #24
77+
ldr d_root1, [r123456_ptr], #32
7878
ldr q_root0, [r123456_ptr, #-16]
7979
.endm
8080

@@ -194,6 +194,7 @@
194194
q_root2_tw .req q6
195195
q_root3_tw .req q7
196196

197+
d_root1 .req d1
197198

198199
tmp .req v24
199200
t0 .req v25
@@ -424,6 +425,7 @@ intt_layer1234_start:
424425
.unreq q_root1_tw
425426
.unreq q_root2_tw
426427
.unreq q_root3_tw
428+
.unreq d_root1
427429
.unreq tmp
428430
.unreq t0
429431
.unreq t1

mldsa/native/aarch64/src/aarch64_zetas.c

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -195,28 +195,33 @@ MLD_ALIGN const int32_t mld_aarch64_intt_zetas_layer78[] = {
195195
};
196196

197197
MLD_ALIGN const int32_t mld_aarch64_intt_zetas_layer123456[] = {
198-
-2283733, -585207070, -1858416, -476219497, -3345963, -857403734,
199-
-2815639, -721508096, -1853806, -475038184, -2917338, -747568486,
200-
3585098, 918682129, -3870317, -991769559, -556856, -142694469,
201-
642628, 164673562, -3192354, -818041395, 2897314, 742437332,
202-
-1460718, -374309300, 3950053, 1012201926, 1716988, 439978542,
203-
-2453983, -628833668, 1935799, 496048908, -3756790, -962678241,
204-
-1714295, -439288460, 3574466, 915957677, 817536, 209493775,
205-
3227876, 827143915, -1759347, -450833045, -3415069, -875112161,
206-
1335936, 342333886, -2156050, -552488273, -3241972, -830756018,
207-
-676590, -173376332, 4018989, 1029866791, -2071829, -530906624,
208-
434125, 111244624, 3506380, 898510625, -1095468, -280713909,
209-
3524442, 903139016, -928749, -237992130, -394148, -101000509,
210-
1674615, 429120452, -1159875, -297218217, -3704823, -949361686,
211-
-2663378, -682491182, -2101410, -538486762, 3110818, 797147778,
212-
4063053, 1041158200, 3586446, 919027554, -2740543, -702264730,
213-
3370349, 863652652, -3182878, -815613168, -3602218, -923069133,
214-
-294725, -75523344, -3761513, -963888510, -3765607, -964937599,
215-
3201430, 820367122, 3145678, 806080660, 2883726, 738955404,
216-
3201494, 820383522, 1221177, 312926867, -557458, -142848732,
217-
1005239, 257592709, -3764867, -964747974, -2129892, -545785280,
218-
-2682288, -687336873, -3542485, -907762539, 601683, 154181397,
219-
0, 0,
198+
-2283733, -585207070, 0, 0, -1858416, -476219497,
199+
-3345963, -857403734, -2815639, -721508096, 0, 0,
200+
-1853806, -475038184, -2917338, -747568486, 3585098, 918682129,
201+
0, 0, -3870317, -991769559, -556856, -142694469,
202+
642628, 164673562, 0, 0, -3192354, -818041395,
203+
2897314, 742437332, -1460718, -374309300, 0, 0,
204+
3950053, 1012201926, 1716988, 439978542, -2453983, -628833668,
205+
0, 0, 1935799, 496048908, -3756790, -962678241,
206+
-1714295, -439288460, 0, 0, 3574466, 915957677,
207+
817536, 209493775, 3227876, 827143915, 0, 0,
208+
-1759347, -450833045, -3415069, -875112161, 1335936, 342333886,
209+
0, 0, -2156050, -552488273, -3241972, -830756018,
210+
-676590, -173376332, 0, 0, 4018989, 1029866791,
211+
-2071829, -530906624, 434125, 111244624, 0, 0,
212+
3506380, 898510625, -1095468, -280713909, 3524442, 903139016,
213+
0, 0, -928749, -237992130, -394148, -101000509,
214+
1674615, 429120452, 0, 0, -1159875, -297218217,
215+
-3704823, -949361686, -2663378, -682491182, 0, 0,
216+
-2101410, -538486762, 3110818, 797147778, 4063053, 1041158200,
217+
0, 0, 3586446, 919027554, -2740543, -702264730,
218+
3370349, 863652652, 0, 0, -3182878, -815613168,
219+
-3602218, -923069133, -294725, -75523344, -3761513, -963888510,
220+
-3765607, -964937599, 3201430, 820367122, 3145678, 806080660,
221+
2883726, 738955404, 3201494, 820383522, 1221177, 312926867,
222+
-557458, -142848732, 1005239, 257592709, -3764867, -964747974,
223+
-2129892, -545785280, -2682288, -687336873, -3542485, -907762539,
224+
601683, 154181397, 0, 0,
220225
};
221226

222227
#else /* MLD_ARITH_BACKEND_AARCH64 */

mldsa/native/aarch64/src/intt.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ Lintt_layer5678_start:
110110
trn2 v11.2d, v26.2d, v28.2d
111111
trn1 v8.2d, v25.2d, v27.2d
112112
trn1 v9.2d, v26.2d, v28.2d
113-
ldr q1, [x2], #0x18
113+
ldr d1, [x2], #0x20
114114
ldur q0, [x2, #-0x10]
115115
sub v24.4s, v8.4s, v9.4s
116116
add v8.4s, v8.4s, v9.4s

scripts/autogen

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,7 @@ def gen_aarch64_intt_zetas_layer78():
682682
def gen_aarch64_intt_zetas_layer123456():
683683
for i in range(16):
684684
yield from gen_aarch64_root_of_unity_for_block(4, i, inv=True)
685+
yield from (0, 0) # Padding
685686
yield from gen_aarch64_root_of_unity_for_block(5, i * 2, inv=True)
686687
yield from gen_aarch64_root_of_unity_for_block(5, i * 2 + 1, inv=True)
687688

@@ -743,7 +744,7 @@ def gen_aarch64_zeta_file(dry_run=False):
743744
yield ""
744745

745746
update_file(
746-
"mldsa/native/aarch64/src/aarch64_zetas.c",
747+
"dev/aarch64_clean/src/aarch64_zetas.c",
747748
"\n".join(gen()),
748749
dry_run=dry_run,
749750
)
@@ -797,7 +798,7 @@ def gen_aarch64_rej_uniform_eta_table(dry_run=False):
797798
yield ""
798799

799800
update_file(
800-
"mldsa/native/aarch64/src/rej_uniform_eta_table.c",
801+
"dev/aarch64_clean/src/rej_uniform_eta_table.c",
801802
"\n".join(gen()),
802803
dry_run=dry_run,
803804
)
@@ -850,7 +851,7 @@ def gen_aarch64_rej_uniform_table(dry_run=False):
850851
yield ""
851852

852853
update_file(
853-
"mldsa/native/aarch64/src/rej_uniform_table.c",
854+
"dev/aarch64_clean/src/rej_uniform_table.c",
854855
"\n".join(gen()),
855856
dry_run=dry_run,
856857
)
@@ -901,7 +902,7 @@ def gen_avx2_rej_uniform_table(dry_run=False):
901902
yield ""
902903

903904
update_file(
904-
"mldsa/native/x86_64/src/rej_uniform_table.c",
905+
"dev/x86_64/src/rej_uniform_table.c",
905906
"\n".join(gen()),
906907
dry_run=dry_run,
907908
)
@@ -1003,9 +1004,7 @@ def gen_avx2_zeta_file(dry_run=False):
10031004
yield from map(lambda t: str(t) + ",", gen_avx2_fwd_ntt_zetas(mult=False))
10041005
yield ""
10051006

1006-
update_file(
1007-
"mldsa/native/x86_64/src/x86_64_zetas.i", "\n".join(gen()), dry_run=dry_run
1008-
)
1007+
update_file("dev/x86_64/src/x86_64_zetas.i", "\n".join(gen()), dry_run=dry_run)
10091008

10101009

10111010
def get_oqs_shared_sources(backend):

0 commit comments

Comments
 (0)