From 44a5e1a39826fe9eeeefde7611b8f10d46158b71 Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Fri, 5 Jul 2024 16:17:42 +1000 Subject: [PATCH] Dilithium: add implementation of signing that allocated less Added implementation of signing that allocates less memory by doing the matrix/vector loops in the sign code - WOLFSSL_DILITHIUM_SIGN_SMALL_MEM. Split out vector operations into vector and polynomial operations so that small mem signing can call them. Fix benchmark to be able to compile with only Dilithium and no asymmetric algorithms. --- wolfcrypt/benchmark/benchmark.c | 8 +- wolfcrypt/src/dilithium.c | 1480 +++++++++++++++++++++++-------- 2 files changed, 1094 insertions(+), 394 deletions(-) diff --git a/wolfcrypt/benchmark/benchmark.c b/wolfcrypt/benchmark/benchmark.c index 964f9ebd0e..225b236586 100644 --- a/wolfcrypt/benchmark/benchmark.c +++ b/wolfcrypt/benchmark/benchmark.c @@ -1226,7 +1226,7 @@ static const char* bench_result_words1[][4] = { defined(HAVE_CURVE25519) || defined(HAVE_CURVE25519_SHARED_SECRET) || \ defined(HAVE_ED25519) || defined(HAVE_CURVE448) || \ defined(HAVE_CURVE448_SHARED_SECRET) || defined(HAVE_ED448) || \ - defined(WOLFSSL_HAVE_KYBER) + defined(WOLFSSL_HAVE_KYBER) || defined(HAVE_DILITHIUM) static const char* bench_desc_words[][15] = { /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 */ @@ -1692,7 +1692,7 @@ static const char* bench_result_words3[][5] = { defined(HAVE_CURVE448) || defined(HAVE_ED448) || \ defined(HAVE_ECC) || !defined(NO_DH) || \ !defined(NO_RSA) || defined(HAVE_SCRYPT) || \ - defined(WOLFSSL_HAVE_KYBER) + defined(WOLFSSL_HAVE_KYBER) || defined(HAVE_DILITHIUM) #define BENCH_ASYM #endif @@ -1700,7 +1700,7 @@ static const char* bench_result_words3[][5] = { #if defined(HAVE_ECC) || !defined(NO_RSA) || !defined(NO_DH) || \ defined(HAVE_CURVE25519) || defined(HAVE_ED25519) || \ defined(HAVE_CURVE448) || defined(HAVE_ED448) || \ - defined(WOLFSSL_HAVE_KYBER) + defined(WOLFSSL_HAVE_KYBER) || defined(HAVE_DILITHIUM) static const char* bench_result_words2[][5] = { #ifdef BENCH_MICROSECOND { "ops took", "μsec" , "avg" , "ops/μsec", NULL }, /* 0 English @@ -2654,7 +2654,7 @@ static void bench_stats_sym_finish(const char* desc, int useDeviceID, #if defined(HAVE_ECC) || !defined(NO_RSA) || !defined(NO_DH) || \ defined(HAVE_CURVE25519) || defined(HAVE_ED25519) || \ defined(HAVE_CURVE448) || defined(HAVE_ED448) || \ - defined(WOLFSSL_HAVE_KYBER) + defined(WOLFSSL_HAVE_KYBER) || defined(HAVE_DILITHIUM) static void bench_stats_asym_finish_ex(const char* algo, int strength, const char* desc, const char* desc_extra, int useDeviceID, int count, double start, int ret) diff --git a/wolfcrypt/src/dilithium.c b/wolfcrypt/src/dilithium.c index 00575b803e..f3a6f01000 100644 --- a/wolfcrypt/src/dilithium.c +++ b/wolfcrypt/src/dilithium.c @@ -48,13 +48,16 @@ * Compiles in only the verification and public key operations. * WOLFSSL_DILITHIUM_VERIFY_SMALL_MEM Default: OFF * Compiles verification implementation that uses smaller amounts of memory. - * WOLFSSL_DILITHIUM_VERIFY_NO_MALLOC + * WOLFSSL_DILITHIUM_VERIFY_NO_MALLOC Default: OFF * Only works with WOLFSSL_DILITHIUM_VERIFY_SMALL_MEM. * Don't allocate memory with XMALLOC. Memory is pinned against key. - * WOLFSSL_DILITHIUM_ASSIGN_KEY + * WOLFSSL_DILITHIUM_ASSIGN_KEY Default: OFF * Key data is assigned into Dilithium key rather than copied. * Life of key data passed in is tightly coupled to life of Dilithium key. * Cannot be used when make key is enabled. + * WOLFSSL_DILITHIUM_SIGN_SMALL_MEM Default: OFF + * Compiles signature implementation that uses smaller amounts of memory but + * is considerably slower. * * WOLFSSL_DILITHIUM_ALIGNMENT Default: 8 * Use to indicate whether loading and storing of words needs to be aligned. @@ -540,6 +543,105 @@ static void dilthium_vec_encode_eta_bits(const sword32* s, byte d, byte eta, #endif /* !WOLFSSL_DILITHIUM_NO_MAKE_KEY */ #if !defined(WOLFSSL_DILITHIUM_NO_SIGN) || defined(WOLFSSL_DILITHIUM_CHECK_KEY) + +#if !defined(WOLFSSL_NO_ML_DSA_44) || !defined(WOLFSSL_NO_ML_DSA_87) +/* Decode polynomial with range -2..2. + * + * FIPS 204. 8.2: Algorithm 19 skDecode(sk) + * ... + * 5: for i from 0 to l - 1 do + * 6: s1[i] <- BitUnpack(yi, eta, eta) + * 7: end for + * ... + * OR + * ... + * 8: for i from 0 to k - 1 do + * 9: s2[i] <- BitUnpack(zi, eta, eta) + * 10: end for + * ... + * Where y and z are arrays of bit arrays. + * + * @param [in] p Buffer of data to decode. + * @param [in] s Vector of decoded polynomials. + */ +static void dilithium_decode_eta_2_bits(const byte* p, sword32* s) +{ + unsigned int j; + + /* Step 6 or 9. + * 3 bits to encode each number. + * 8 numbers from 3 bytes. (8 * 3 bits = 3 * 8 bits) */ + for (j = 0; j < DILITHIUM_N; j += 8) { + /* Get 3 bits and put in range of -2..2. */ + s[j + 0] = 2 - ((p[0] >> 0) & 0x7 ); + s[j + 1] = 2 - ((p[0] >> 3) & 0x7 ); + s[j + 2] = 2 - ((p[0] >> 6) | ((p[1] << 2) & 0x7)); + s[j + 3] = 2 - ((p[1] >> 1) & 0x7 ); + s[j + 4] = 2 - ((p[1] >> 4) & 0x7 ); + s[j + 5] = 2 - ((p[1] >> 7) | ((p[2] << 1) & 0x7)); + s[j + 6] = 2 - ((p[2] >> 2) & 0x7 ); + s[j + 7] = 2 - ((p[2] >> 5) & 0x7 ); + /* Move to next place to decode from. */ + p += DILITHIUM_ETA_2_BITS; + } +} +#endif +#ifndef WOLFSSL_NO_ML_DSA_65 +/* Decode polynomial with range -4..4. + * + * FIPS 204. 8.2: Algorithm 19 skDecode(sk) + * ... + * 5: for i from 0 to l - 1 do + * 6: s1[i] <- BitUnpack(yi, eta, eta) + * 7: end for + * ... + * OR + * ... + * 8: for i from 0 to k - 1 do + * 9: s2[i] <- BitUnpack(zi, eta, eta) + * 10: end for + * ... + * Where y and z are arrays of bit arrays. + * + * @param [in] p Buffer of data to decode. + * @param [in] s Vector of decoded polynomials. + */ +static void dilithium_decode_eta_4_bits(const byte* p, sword32* s) +{ + unsigned int j; + +#ifdef WOLFSSL_DILITHIUM_SMALL + /* Step 6 or 9. + * 4 bits to encode each number. + * 2 numbers from 1 bytes. (2 * 4 bits = 1 * 8 bits) */ + for (j = 0; j < DILITHIUM_N / 2; j++) { + /* Get 4 bits and put in range of -4..4. */ + s[j * 2 + 0] = 4 - (p[j] & 0xf); + s[j * 2 + 1] = 4 - (p[j] >> 4); + } +#else + /* Step 6 or 9. + * 4 bits to encode each number. + * 8 numbers from 4 bytes. (8 * 4 bits = 4 * 8 bits) */ + for (j = 0; j < DILITHIUM_N / 2; j += 4) { + /* Get 4 bits and put in range of -4..4. */ + s[j * 2 + 0] = 4 - (p[j + 0] & 0xf); + s[j * 2 + 1] = 4 - (p[j + 0] >> 4); + s[j * 2 + 2] = 4 - (p[j + 1] & 0xf); + s[j * 2 + 3] = 4 - (p[j + 1] >> 4); + s[j * 2 + 4] = 4 - (p[j + 2] & 0xf); + s[j * 2 + 5] = 4 - (p[j + 2] >> 4); + s[j * 2 + 6] = 4 - (p[j + 3] & 0xf); + s[j * 2 + 7] = 4 - (p[j + 3] >> 4); + } +#endif /* WOLFSSL_DILITHIUM_SMALL */ +} +#endif + +#if defined(WOLFSSL_DILITHIUM_CHECK_KEY) || \ + (!defined(WOLFSSL_DILITHIUM_NO_SIGN) && \ + (defined(WC_DILITHIUM_CACHE_PRIV_VECTORS) || \ + !defined(WOLFSSL_DILITHIUM_SIGN_SMALL_MEM))) /* Decode vector of polynomials with range -ETA..ETA. * * FIPS 204. 8.2: Algorithm 19 skDecode(sk) @@ -565,29 +667,15 @@ static void dilithium_vec_decode_eta_bits(const byte* p, byte eta, sword32* s, byte d) { unsigned int i; - unsigned int j; #if !defined(WOLFSSL_NO_ML_DSA_44) || !defined(WOLFSSL_NO_ML_DSA_87) /* -2..2 */ if (eta == DILITHIUM_ETA_2) { /* Step 5 or 8: For each polynomial of vector */ for (i = 0; i < d; i++) { - /* Step 6 or 9. - * 3 bits to encode each number. - * 8 numbers from 3 bytes. (8 * 3 bits = 3 * 8 bits) */ - for (j = 0; j < DILITHIUM_N; j += 8) { - /* Get 3 bits and put in range of -2..2. */ - s[j + 0] = 2 - ((p[0] >> 0) & 0x7 ); - s[j + 1] = 2 - ((p[0] >> 3) & 0x7 ); - s[j + 2] = 2 - ((p[0] >> 6) | ((p[1] << 2) & 0x7)); - s[j + 3] = 2 - ((p[1] >> 1) & 0x7 ); - s[j + 4] = 2 - ((p[1] >> 4) & 0x7 ); - s[j + 5] = 2 - ((p[1] >> 7) | ((p[2] << 1) & 0x7)); - s[j + 6] = 2 - ((p[2] >> 2) & 0x7 ); - s[j + 7] = 2 - ((p[2] >> 5) & 0x7 ); - /* Move to next place to decode from. */ - p += DILITHIUM_ETA_2_BITS; - } + dilithium_decode_eta_2_bits(p, s); + /* Move to next place to decode from. */ + p += DILITHIUM_ETA_2_BITS * DILITHIUM_N / 8; /* Next polynomial. */ s += DILITHIUM_N; } @@ -599,31 +687,7 @@ static void dilithium_vec_decode_eta_bits(const byte* p, byte eta, sword32* s, if (eta == DILITHIUM_ETA_4) { /* Step 5 or 8: For each polynomial of vector */ for (i = 0; i < d; i++) { - #ifdef WOLFSSL_DILITHIUM_SMALL - /* Step 6 or 9. - * 4 bits to encode each number. - * 2 numbers from 1 bytes. (2 * 4 bits = 1 * 8 bits) */ - for (j = 0; j < DILITHIUM_N / 2; j++) { - /* Get 4 bits and put in range of -4..4. */ - s[j * 2 + 0] = 4 - (p[j] & 0xf); - s[j * 2 + 1] = 4 - (p[j] >> 4); - } - #else - /* Step 6 or 9. - * 4 bits to encode each number. - * 8 numbers from 4 bytes. (8 * 4 bits = 4 * 8 bits) */ - for (j = 0; j < DILITHIUM_N / 2; j += 4) { - /* Get 4 bits and put in range of -4..4. */ - s[j * 2 + 0] = 4 - (p[j + 0] & 0xf); - s[j * 2 + 1] = 4 - (p[j + 0] >> 4); - s[j * 2 + 2] = 4 - (p[j + 1] & 0xf); - s[j * 2 + 3] = 4 - (p[j + 1] >> 4); - s[j * 2 + 4] = 4 - (p[j + 2] & 0xf); - s[j * 2 + 5] = 4 - (p[j + 2] >> 4); - s[j * 2 + 6] = 4 - (p[j + 3] & 0xf); - s[j * 2 + 7] = 4 - (p[j + 3] >> 4); - } - #endif + dilithium_decode_eta_4_bits(p, s); /* Move to next place to decode from. */ p += DILITHIUM_N / 2; /* Next polynomial. */ @@ -635,6 +699,7 @@ static void dilithium_vec_decode_eta_bits(const byte* p, byte eta, sword32* s, { } } +#endif #endif /* !WOLFSSL_DILITHIUM_NO_SIGN || WOLFSSL_DILITHIUM_CHECK_KEY */ #ifndef WOLFSSL_DILITHIUM_NO_MAKE_KEY @@ -759,6 +824,86 @@ static void dilithium_vec_encode_t0_t1(sword32* t, byte d, byte* t0, byte* t1) #endif /* !WOLFSSL_DILITHIUM_NO_MAKE_KEY */ #if !defined(WOLFSSL_DILITHIUM_NO_SIGN) || defined(WOLFSSL_DILITHIUM_CHECK_KEY) +/* Decode bottom D bits of t as t0. + * + * FIPS 204. 8.2: Algorithm 19 skDecode(sk) + * ... + * 12: t0[i] <- BitUnpack(wi, 2^(d-1) - 1, 2^(d-1) + * ... + * + * @param [in] t0 Encoded values of t0. + * @param [in] d Dimensions of vector t0. + * @param [out] t Vector of polynomials. + */ +static void dilithium_decode_t0(const byte* t0, sword32* t) +{ + unsigned int j; + + /* Step 12. Get 13 bits and convert to range (2^(d-1)-1)..2^(d-1). */ + for (j = 0; j < DILITHIUM_N; j += 8) { + /* 13 bits used per number. + * 8 numbers from 13 bytes. (8 * 13 bits = 13 * 8 bits) */ +#if defined(LITTLE_ENDIAN_ORDER) && (WOLFSSL_DILITHIUM_ALIGNMENT == 0) + word32 t32_2 = ((const word32*)t0)[2]; + #ifdef WC_64BIT_CPU + word64 t64 = *(const word64*)t0; + t[j + 0] = DILITHIUM_D_MAX_HALF - ( t64 & 0x1fff); + t[j + 1] = DILITHIUM_D_MAX_HALF - ((t64 >> 13) & 0x1fff); + t[j + 2] = DILITHIUM_D_MAX_HALF - ((t64 >> 26) & 0x1fff); + t[j + 3] = DILITHIUM_D_MAX_HALF - ((t64 >> 39) & 0x1fff); + t[j + 4] = DILITHIUM_D_MAX_HALF - + ((t64 >> 52) | ((t32_2 & 0x0001) << 12)); + #else + word32 t32_0 = ((const word32*)t0)[0]; + word32 t32_1 = ((const word32*)t0)[1]; + t[j + 0] = DILITHIUM_D_MAX_HALF - + ( t32_0 & 0x1fff); + t[j + 1] = DILITHIUM_D_MAX_HALF - + ((t32_0 >> 13) & 0x1fff); + t[j + 2] = DILITHIUM_D_MAX_HALF - + (( t32_0 >> 26 ) | ((t32_1 & 0x007f) << 6)); + t[j + 3] = DILITHIUM_D_MAX_HALF - + ((t32_1 >> 7) & 0x1fff); + t[j + 4] = DILITHIUM_D_MAX_HALF - + (( t32_1 >> 20 ) | ((t32_2 & 0x0001) << 12)); + #endif + t[j + 5] = DILITHIUM_D_MAX_HALF - + ((t32_2 >> 1) & 0x1fff); + t[j + 6] = DILITHIUM_D_MAX_HALF - + ((t32_2 >> 14) & 0x1fff); + t[j + 7] = DILITHIUM_D_MAX_HALF - + (( t32_2 >> 27 ) | ((word32)t0[12] ) << 5 ); +#else + t[j + 0] = DILITHIUM_D_MAX_HALF - + ((t0[ 0] ) | (((word16)(t0[ 1] & 0x1f)) << 8)); + t[j + 1] = DILITHIUM_D_MAX_HALF - + ((t0[ 1] >> 5) | (((word16)(t0[ 2] )) << 3) | + (((word16)(t0[ 3] & 0x03)) << 11)); + t[j + 2] = DILITHIUM_D_MAX_HALF - + ((t0[ 3] >> 2) | (((word16)(t0[ 4] & 0x7f)) << 6)); + t[j + 3] = DILITHIUM_D_MAX_HALF - + ((t0[ 4] >> 7) | (((word16)(t0[ 5] )) << 1) | + (((word16)(t0[ 6] & 0x0f)) << 9)); + t[j + 4] = DILITHIUM_D_MAX_HALF - + ((t0[ 6] >> 4) | (((word16)(t0[ 7] )) << 4) | + (((word16)(t0[ 8] & 0x01)) << 12)); + t[j + 5] = DILITHIUM_D_MAX_HALF - + ((t0[ 8] >> 1) | (((word16)(t0[ 9] & 0x3f)) << 7)); + t[j + 6] = DILITHIUM_D_MAX_HALF - + ((t0[ 9] >> 6) | (((word16)(t0[10] )) << 2) | + (((word16)(t0[11] & 0x07)) << 10)); + t[j + 7] = DILITHIUM_D_MAX_HALF - + ((t0[11] >> 3) | (((word16)(t0[12] )) << 5)); +#endif + /* Move to next place to decode from. */ + t0 += DILITHIUM_D; + } +} + +#if defined(WOLFSSL_DILITHIUM_CHECK_KEY) || \ + (!defined(WOLFSSL_DILITHIUM_NO_SIGN) && \ + (defined(WC_DILITHIUM_CACHE_PRIV_VECTORS) || \ + !defined(WOLFSSL_DILITHIUM_SIGN_SMALL_MEM))) /* Decode bottom D bits of t as t0. * * FIPS 204. 8.2: Algorithm 19 skDecode(sk) @@ -778,70 +923,13 @@ static void dilithium_vec_decode_t0(const byte* t0, byte d, sword32* t) /* Step 11. For each polynomial of vector. */ for (i = 0; i < d; i++) { - unsigned int j; - /* Step 12. Get 13 bits and convert to range (2^(d-1)-1)..2^(d-1). */ - for (j = 0; j < DILITHIUM_N; j += 8) { - /* 13 bits used per number. - * 8 numbers from 13 bytes. (8 * 13 bits = 13 * 8 bits) */ - #if defined(LITTLE_ENDIAN_ORDER) && (WOLFSSL_DILITHIUM_ALIGNMENT == 0) - word32 t32_2 = ((const word32*)t0)[2]; - #ifdef WC_64BIT_CPU - word64 t64 = *(const word64*)t0; - t[j + 0] = DILITHIUM_D_MAX_HALF - ( t64 & 0x1fff); - t[j + 1] = DILITHIUM_D_MAX_HALF - ((t64 >> 13) & 0x1fff); - t[j + 2] = DILITHIUM_D_MAX_HALF - ((t64 >> 26) & 0x1fff); - t[j + 3] = DILITHIUM_D_MAX_HALF - ((t64 >> 39) & 0x1fff); - t[j + 4] = DILITHIUM_D_MAX_HALF - - ((t64 >> 52) | ((t32_2 & 0x0001) << 12)); - #else - word32 t32_0 = ((const word32*)t0)[0]; - word32 t32_1 = ((const word32*)t0)[1]; - t[j + 0] = DILITHIUM_D_MAX_HALF - - ( t32_0 & 0x1fff); - t[j + 1] = DILITHIUM_D_MAX_HALF - - ((t32_0 >> 13) & 0x1fff); - t[j + 2] = DILITHIUM_D_MAX_HALF - - (( t32_0 >> 26 ) | ((t32_1 & 0x007f) << 6)); - t[j + 3] = DILITHIUM_D_MAX_HALF - - ((t32_1 >> 7) & 0x1fff); - t[j + 4] = DILITHIUM_D_MAX_HALF - - (( t32_1 >> 20 ) | ((t32_2 & 0x0001) << 12)); - #endif - t[j + 5] = DILITHIUM_D_MAX_HALF - - ((t32_2 >> 1) & 0x1fff); - t[j + 6] = DILITHIUM_D_MAX_HALF - - ((t32_2 >> 14) & 0x1fff); - t[j + 7] = DILITHIUM_D_MAX_HALF - - (( t32_2 >> 27 ) | ((word32)t0[12] ) << 5 ); - #else - t[j + 0] = DILITHIUM_D_MAX_HALF - - ((t0[ 0] ) | (((word16)(t0[ 1] & 0x1f)) << 8)); - t[j + 1] = DILITHIUM_D_MAX_HALF - - ((t0[ 1] >> 5) | (((word16)(t0[ 2] )) << 3) | - (((word16)(t0[ 3] & 0x03)) << 11)); - t[j + 2] = DILITHIUM_D_MAX_HALF - - ((t0[ 3] >> 2) | (((word16)(t0[ 4] & 0x7f)) << 6)); - t[j + 3] = DILITHIUM_D_MAX_HALF - - ((t0[ 4] >> 7) | (((word16)(t0[ 5] )) << 1) | - (((word16)(t0[ 6] & 0x0f)) << 9)); - t[j + 4] = DILITHIUM_D_MAX_HALF - - ((t0[ 6] >> 4) | (((word16)(t0[ 7] )) << 4) | - (((word16)(t0[ 8] & 0x01)) << 12)); - t[j + 5] = DILITHIUM_D_MAX_HALF - - ((t0[ 8] >> 1) | (((word16)(t0[ 9] & 0x3f)) << 7)); - t[j + 6] = DILITHIUM_D_MAX_HALF - - ((t0[ 9] >> 6) | (((word16)(t0[10] )) << 2) | - (((word16)(t0[11] & 0x07)) << 10)); - t[j + 7] = DILITHIUM_D_MAX_HALF - - ((t0[11] >> 3) | (((word16)(t0[12] )) << 5)); - #endif - /* Move to next place to decode from. */ - t0 += DILITHIUM_D; - } + dilithium_decode_t0(t0, t); + t0 += DILITHIUM_D * DILITHIUM_N / 8; /* Next polynomial. */ t += DILITHIUM_N; } } +#endif #endif /* !WOLFSSL_DILITHIUM_NO_SIGN || WOLFSSL_DILITHIUM_CHECK_KEY */ #if !defined(WOLFSSL_DILITHIUM_NO_VERIFY) || \ @@ -951,6 +1039,111 @@ static void dilithium_vec_decode_t1(const byte* t1, byte d, sword32* t) #ifndef WOLFSSL_DILITHIUM_NO_SIGN +#ifndef WOLFSSL_NO_ML_DSA_44 +/* Encode z with range of -(GAMMA1-1)...GAMMA1 + * + * FIPS 204. 8.2: Algorithm 20 sigEncode(c_tilde, z, h) + * ... + * 3: sigma <- sigma || BitPack(z[i], GAMMA1 - 1, GAMMA1) + * ... + * + * @param [in] z Polynomial to encode. + * @param [out] s Buffer to encode into. + */ +static void dilithium_encode_gamma1_17_bits(const sword32* z, byte* s) +{ + unsigned int j; + + /* Step 3. Get 18 bits as a number. */ + for (j = 0; j < DILITHIUM_N; j += 4) { + word32 z0 = DILITHIUM_GAMMA1_17 - z[j + 0]; + word32 z1 = DILITHIUM_GAMMA1_17 - z[j + 1]; + word32 z2 = DILITHIUM_GAMMA1_17 - z[j + 2]; + word32 z3 = DILITHIUM_GAMMA1_17 - z[j + 3]; + + /* 18 bits per number. + * 8 numbers become 9 bytes. (8 * 9 bits = 9 * 8 bits) */ +#if defined(LITTLE_ENDIAN_ORDER) && (WOLFSSL_DILITHIUM_ALIGNMENT == 0) + #ifdef WC_64BIT_CPU + word64* s64p = (word64*)s; + s64p[0] = z0 | ((word64)z1 << 18) | + ((word64)z2 << 36) | ((word64)z3 << 54); + #else + word32* s32p = (word32*)s; + s32p[0] = z0 | (z1 << 18) ; + s32p[1] = (z1 >> 14) | (z2 << 4) | (z3 << 22); + #endif +#else + s[0] = z0 ; + s[1] = z0 >> 8 ; + s[2] = (z0 >> 16) | (z1 << 2); + s[3] = z1 >> 6 ; + s[4] = (z1 >> 14) | (z2 << 4); + s[5] = z2 >> 4 ; + s[6] = (z2 >> 12) | (z3 << 6); + s[7] = z3 >> 2 ; +#endif + s[8] = z3 >> 10 ; + /* Move to next place to encode to. */ + s += DILITHIUM_GAMMA1_17_ENC_BITS / 2; + } +} +#endif +#if !defined(WOLFSSL_NO_ML_DSA_65) || !defined(WOLFSSL_NO_ML_DSA_87) +/* Encode z with range of -(GAMMA1-1)...GAMMA1 + * + * FIPS 204. 8.2: Algorithm 20 sigEncode(c_tilde, z, h) + * ... + * 3: sigma <- sigma || BitPack(z[i], GAMMA1 - 1, GAMMA1) + * ... + * + * @param [in] z Polynomial to encode. + * @param [out] s Buffer to encode into. + */ +static void dilithium_encode_gamma1_19_bits(const sword32* z, byte* s) +{ + unsigned int j; + + /* Step 3. Get 20 bits as a number. */ + for (j = 0; j < DILITHIUM_N; j += 4) { + sword32 z0 = DILITHIUM_GAMMA1_19 - z[j + 0]; + sword32 z1 = DILITHIUM_GAMMA1_19 - z[j + 1]; + sword32 z2 = DILITHIUM_GAMMA1_19 - z[j + 2]; + sword32 z3 = DILITHIUM_GAMMA1_19 - z[j + 3]; + + /* 20 bits per number. + * 4 numbers become 10 bytes. (4 * 20 bits = 10 * 8 bits) */ +#if defined(LITTLE_ENDIAN_ORDER) && (WOLFSSL_DILITHIUM_ALIGNMENT <= 2) + word16* s16p = (word16*)s; + #ifdef WC_64BIT_CPU + word64* s64p = (word64*)s; + s64p[0] = z0 | ((word64)z1 << 20) | + ((word64)z2 << 40) | ((word64)z3 << 60); + #else + word32* s32p = (word32*)s; + s32p[0] = z0 | (z1 << 20) ; + s32p[1] = (z1 >> 12) | (z2 << 8) | (z3 << 28); + #endif + s16p[4] = (z3 >> 4) ; +#else + s[0] = z0 ; + s[1] = (z0 >> 8) ; + s[2] = (z0 >> 16) | (z1 << 4); + s[3] = (z1 >> 4) ; + s[4] = (z1 >> 12) ; + s[5] = z2 ; + s[6] = (z2 >> 8) ; + s[7] = (z2 >> 16) | (z3 << 4); + s[8] = (z3 >> 4) ; + s[9] = (z3 >> 12) ; +#endif + /* Move to next place to encode to. */ + s += DILITHIUM_GAMMA1_19_ENC_BITS / 2; + } +} +#endif + +#ifndef WOLFSSL_DILITHIUM_SIGN_SMALL_MEM /* Encode z with range of -(GAMMA1-1)...GAMMA1 * * FIPS 204. 8.2: Algorithm 20 sigEncode(c_tilde, z, h) @@ -969,7 +1162,6 @@ static void dilithium_vec_encode_gamma1(const sword32* z, byte l, int bits, byte* s) { unsigned int i; - unsigned int j; (void)l; @@ -977,39 +1169,9 @@ static void dilithium_vec_encode_gamma1(const sword32* z, byte l, int bits, if (bits == DILITHIUM_GAMMA1_BITS_17) { /* Step 2. For each polynomial of vector. */ for (i = 0; i < PARAMS_ML_DSA_44_L; i++) { - /* Step 3. Get 18 bits as a number. */ - for (j = 0; j < DILITHIUM_N; j += 4) { - word32 z0 = DILITHIUM_GAMMA1_17 - z[j + 0]; - word32 z1 = DILITHIUM_GAMMA1_17 - z[j + 1]; - word32 z2 = DILITHIUM_GAMMA1_17 - z[j + 2]; - word32 z3 = DILITHIUM_GAMMA1_17 - z[j + 3]; - - /* 18 bits per number. - * 8 numbers become 9 bytes. (8 * 9 bits = 9 * 8 bits) */ - #if defined(LITTLE_ENDIAN_ORDER) && (WOLFSSL_DILITHIUM_ALIGNMENT == 0) - #ifdef WC_64BIT_CPU - word64* s64p = (word64*)s; - s64p[0] = z0 | ((word64)z1 << 18) | - ((word64)z2 << 36) | ((word64)z3 << 54); - #else - word32* s32p = (word32*)s; - s32p[0] = z0 | (z1 << 18) ; - s32p[1] = (z1 >> 14) | (z2 << 4) | (z3 << 22); - #endif - #else - s[0] = z0 ; - s[1] = z0 >> 8 ; - s[2] = (z0 >> 16) | (z1 << 2); - s[3] = z1 >> 6 ; - s[4] = (z1 >> 14) | (z2 << 4); - s[5] = z2 >> 4 ; - s[6] = (z2 >> 12) | (z3 << 6); - s[7] = z3 >> 2 ; - #endif - s[8] = z3 >> 10 ; - /* Move to next place to encode to. */ - s += DILITHIUM_GAMMA1_17_ENC_BITS / 2; - } + dilithium_encode_gamma1_17_bits(z, s); + /* Move to next place to encode to. */ + s += DILITHIUM_GAMMA1_17_ENC_BITS / 2 * DILITHIUM_N / 4; /* Next polynomial. */ z += DILITHIUM_N; } @@ -1020,42 +1182,9 @@ static void dilithium_vec_encode_gamma1(const sword32* z, byte l, int bits, if (bits == DILITHIUM_GAMMA1_BITS_19) { /* Step 2. For each polynomial of vector. */ for (i = 0; i < l; i++) { - /* Step 3. Get 20 bits as a number. */ - for (j = 0; j < DILITHIUM_N; j += 4) { - sword32 z0 = DILITHIUM_GAMMA1_19 - z[j + 0]; - sword32 z1 = DILITHIUM_GAMMA1_19 - z[j + 1]; - sword32 z2 = DILITHIUM_GAMMA1_19 - z[j + 2]; - sword32 z3 = DILITHIUM_GAMMA1_19 - z[j + 3]; - - /* 20 bits per number. - * 4 numbers become 10 bytes. (4 * 20 bits = 10 * 8 bits) */ - #if defined(LITTLE_ENDIAN_ORDER) && (WOLFSSL_DILITHIUM_ALIGNMENT <= 2) - word16* s16p = (word16*)s; - #ifdef WC_64BIT_CPU - word64* s64p = (word64*)s; - s64p[0] = z0 | ((word64)z1 << 20) | - ((word64)z2 << 40) | ((word64)z3 << 60); - #else - word32* s32p = (word32*)s; - s32p[0] = z0 | (z1 << 20) ; - s32p[1] = (z1 >> 12) | (z2 << 8) | (z3 << 28); - #endif - s16p[4] = (z3 >> 4) ; - #else - s[0] = z0 ; - s[1] = (z0 >> 8) ; - s[2] = (z0 >> 16) | (z1 << 4); - s[3] = (z1 >> 4) ; - s[4] = (z1 >> 12) ; - s[5] = z2 ; - s[6] = (z2 >> 8) ; - s[7] = (z2 >> 16) | (z3 << 4); - s[8] = (z3 >> 4) ; - s[9] = (z3 >> 12) ; - #endif - /* Move to next place to encode to. */ - s += DILITHIUM_GAMMA1_19_ENC_BITS / 2; - } + dilithium_encode_gamma1_19_bits(z, s); + /* Move to next place to encode to. */ + s += DILITHIUM_GAMMA1_19_ENC_BITS / 2 * DILITHIUM_N / 4; /* Next polynomial. */ z += DILITHIUM_N; } @@ -1065,6 +1194,7 @@ static void dilithium_vec_encode_gamma1(const sword32* z, byte l, int bits, { } } +#endif /* WOLFSSL_DILITHIUM_SIGN_SMALL_MEM */ #endif /* !WOLFSSL_DILITHIUM_NO_SIGN */ @@ -1830,8 +1960,13 @@ static int dilithium_rej_ntt_poly(wc_Shake* shake128, byte* seed, sword32* a, #endif } -#if !defined(WOLFSSL_DILITHIUM_VERIFY_ONLY) || \ - !defined(WOLFSSL_DILITHIUM_VERIFY_SMALL_MEM) +#if !defined(WOLFSSL_DILITHIUM_NO_MAKE_KEY) || \ + defined(WOLFSSL_DILITHIUM_CHECK_KEY) || \ + (!defined(WOLFSSL_DILITHIUM_NO_VERIFY) && \ + !defined(WOLFSSL_DILITHIUM_VERIFY_SMALL_MEM)) || \ + (!defined(WOLFSSL_DILITHIUM_NO_SIGN) && \ + (!defined(WOLFSSL_DILITHIUM_SIGN_SMALL_MEM) || \ + defined(WC_DILITHIUM_CACHE_MATRIX_A))) /* Expand the seed to create matrix a. * * FIPS 204. 8.3: Algorithm 26 ExpandA(rho) @@ -2349,8 +2484,8 @@ static int dilithium_expand_s(wc_Shake* shake256, byte* priv_seed, byte eta, * @return 0 on success. * @return Negative on hash error. */ -static int dilithium_expand_mask(wc_Shake* shake256, byte* seed, word16 kappa, - byte gamma1_bits, sword32* y, byte l) +static int dilithium_vec_expand_mask(wc_Shake* shake256, byte* seed, + word16 kappa, byte gamma1_bits, sword32* y, byte l) { int ret = 0; byte r; @@ -2599,6 +2734,7 @@ static void dilithium_decompose_q32(sword32 r, sword32* r0, sword32* r1) #ifndef WOLFSSL_DILITHIUM_NO_SIGN +#ifndef WOLFSSL_DILITHIUM_SIGN_SMALL_MEM /* Decompose vector of polynomials into high and low based on GAMMA2. * * @param [in] r Vector of polynomials to decompose. @@ -2652,6 +2788,7 @@ static void dilithium_vec_decompose(const sword32* r, byte k, sword32 gamma2, { } } +#endif #endif /* !WOLFSSL_DILITHIUM_NO_SIGN */ @@ -2660,6 +2797,38 @@ static void dilithium_vec_decompose(const sword32* r, byte k, sword32 gamma2, ******************************************************************************/ #if !defined(WOLFSSL_DILITHIUM_NO_SIGN) || !defined(WOLFSSL_DILITHIUM_NO_VERIFY) +/* Check that the values of the polynomial are in range. + * + * Many places in FIPS 204. One example from Algorithm 2: + * 23: if ||z||inf >= GAMMA1 - BETA or ..., then (z, h) = falsam + * + * @param [in] a Polynomial. + * @param [in] hi Largest value in range. + */ +static int dilithium_check_low(const sword32* a, sword32 hi) +{ + int ret = 1; + unsigned int j; + /* Calculate lowest range value. */ + sword32 nhi = -hi; + + /* For each value of polynomial. */ + for (j = 0; j < DILITHIUM_N; j++) { + /* Check range is -(hi-1)..(hi-1). */ + if ((a[j] <= nhi) || (a[j] >= hi)) { + /* Check failed. */ + ret = 0; + break; + } + } + + return ret; +} + +#if (!defined(WOLFSSL_DILITHIUM_NO_VERIFY) && \ + !defined(WOLFSSL_DILITHIUM_VERIFY_SMALL_MEM)) || \ + (!defined(WOLFSSL_DILITHIUM_NO_SIGN) && \ + !defined(WOLFSSL_DILITHIUM_SIGN_SMALL_MEM)) /* Check that the values of the vector are in range. * * Many places in FIPS 204. One example from Algorithm 2: @@ -2669,24 +2838,16 @@ static void dilithium_vec_decompose(const sword32* r, byte k, sword32 gamma2, * @param [in] l Dimension of vector. * @param [in] hi Largest value in range. */ -static int dilithium_check_low(const sword32* a, byte l, sword32 hi) +static int dilithium_vec_check_low(const sword32* a, byte l, sword32 hi) { int ret = 1; unsigned int i; - /* Calculate lowest range value. */ - sword32 nhi = -hi; /* For each polynomial of vector. */ for (i = 0; (ret == 1) && (i < l); i++) { - unsigned int j; - /* For each value of polynomial. */ - for (j = 0; j < DILITHIUM_N; j++) { - /* Check range is -(hi-1)..(hi-1). */ - if ((a[j] <= nhi) || (a[j] >= hi)) { - /* Check failed. */ - ret = 0; - break; - } + ret = dilithium_check_low(a, hi); + if (ret == 0) { + break; } /* Next polynomial. */ a += DILITHIUM_N; @@ -2695,6 +2856,7 @@ static int dilithium_check_low(const sword32* a, byte l, sword32 hi) return ret; } #endif +#endif /****************************************************************************** * Hint operations @@ -2702,6 +2864,7 @@ static int dilithium_check_low(const sword32* a, byte l, sword32 hi) #ifndef WOLFSSL_DILITHIUM_NO_SIGN +#ifndef WOLFSSL_NO_ML_DSA_44 /* Compute hints indicating whether adding ct0 to w alters high bits of w. * * FIPS 204. 6: Algorithm 2 ML-DSA.Sign(sk, M) @@ -2725,102 +2888,209 @@ static int dilithium_check_low(const sword32* a, byte l, sword32 hi) * * FIPS 204. 8.1: Algorithm 14 HintBitPack(h) * ... - * 2: Index <- 0 - * 3. for i from 0 to k - 1 do * 4: for j from 0 to 255 do * 5: if h[i]j != 0 then * 6: y[Index] <- j * 7: Index <- Index + 1 * 8: end if * 9: end for - * 10: y[OMEGA + i] <- Index - * 11: end for - * 12: return y + * ... * - * @param [in] s Vector of polynomials that is sum of ct0 and w0. - * @param [in] w1 Vector of polynomials that is high part of w. - * @param [in] k Dimension of vectors. - * @param [in] gamma2 Low-order rounding range, GAMMA2. - * @param [in] omega Maximum number of hints allowed. - * @param [out] h Encoded hints. + * @param [in] s Vector of polynomials that is sum of ct0 and w0. + * @param [in] w1 Vector of polynomials that is high part of w. + * @param [out] h Encoded hints. + * @param [in, out] idxp Index to write next hint into. * return Number of hints on success. * return Falsam of -1 when too many hints. */ -static int dilithium_make_hint(const sword32* s, const sword32* w1, byte k, - word32 gamma2, byte omega, byte* h) +static int dilithium_make_hint_88(const sword32* s, const sword32* w1, byte* h, + byte *idxp) { - unsigned int i; unsigned int j; - byte idx = 0; - - (void)k; - (void)omega; + byte idx = *idxp; -#ifndef WOLFSSL_NO_ML_DSA_44 - if (gamma2 == DILITHIUM_Q_LOW_88) { - /* Alg 14, Step 2: For each polynomial of vector. */ - for (i = 0; i < PARAMS_ML_DSA_44_K; i++) { - /* Alg 14, Step 3: For each value of polynomial. */ - for (j = 0; j < DILITHIUM_N; j++) { - /* Alg 14, Step 4: Check whether hint is required. - * Did sum end up greater than low modulus or - * sum end up less than the negative of low modulus or - * sum is the negative of the low modulus and w1 is not zero, - * then w1 will be modified. - */ - if ((s[j] > (sword32)DILITHIUM_Q_LOW_88) || - (s[j] < -(sword32)DILITHIUM_Q_LOW_88) || - ((s[j] == -(sword32)DILITHIUM_Q_LOW_88) && - (w1[j] != 0))) { - /* Alg 14, Step 6, 7: Put index as hint modifier. */ - h[idx++] = (byte)j; - /* Alg 2, Step 27: If there are too many hints, return - * falsam of -1. */ - if (idx > PARAMS_ML_DSA_44_OMEGA) { - return -1; - } - } + /* Alg 14, Step 3: For each value of polynomial. */ + for (j = 0; j < DILITHIUM_N; j++) { + /* Alg 14, Step 4: Check whether hint is required. + * Did sum end up greater than low modulus or + * sum end up less than the negative of low modulus or + * sum is the negative of the low modulus and w1 is not zero, + * then w1 will be modified. + */ + if ((s[j] > (sword32)DILITHIUM_Q_LOW_88) || + (s[j] < -(sword32)DILITHIUM_Q_LOW_88) || + ((s[j] == -(sword32)DILITHIUM_Q_LOW_88) && + (w1[j] != 0))) { + /* Alg 14, Step 6, 7: Put index as hint modifier. */ + h[idx++] = (byte)j; + /* Alg 2, Step 27: If there are too many hints, return + * falsam of -1. */ + if (idx > PARAMS_ML_DSA_44_OMEGA) { + return -1; } - /* Alg 14, Step 10: Store count of hints for polynomial at end of - * list. */ - h[omega + i] = idx; - - /* Next polynomial. */ - s += DILITHIUM_N; - w1 += DILITHIUM_N; } } - else + + *idxp = idx; + return 0; +} #endif #if !defined(WOLFSSL_NO_ML_DSA_65) || !defined(WOLFSSL_NO_ML_DSA_87) - if (gamma2 == DILITHIUM_Q_LOW_32) { - /* Alg 14, Step 2: For each polynomial of vector. */ - for (i = 0; i < k; i++) { - /* Alg 14, Step 3: For each value of polynomial. */ - for (j = 0; j < DILITHIUM_N; j++) { - /* Alg 14, Step 4: Check whether hint is required. - * Did sum end up greater than low modulus or - * sum end up less than the negative of low modulus or - * sum is the negative of the low modulus and w1 is not zero, - * then w1 will be modified. - */ - if ((s[j] > (sword32)DILITHIUM_Q_LOW_32) || - (s[j] < -(sword32)DILITHIUM_Q_LOW_32) || - ((s[j] == -(sword32)DILITHIUM_Q_LOW_32) && - (w1[j] != 0))) { - /* Alg 14, Step 6, 7: Put index as hint modifier. */ - h[idx++] = (byte)j; - /* Alg 2, Step 27: If there are too many hints, return - * falsam of -1. */ - if (idx > omega) { - return -1; - } - } - } +/* Compute hints indicating whether adding ct0 to w alters high bits of w. + * + * FIPS 204. 6: Algorithm 2 ML-DSA.Sign(sk, M) + * ... + * 26: h <- MakeHint(-<>, w - <> + <>) + * 27: if ... or the number of 1's in h is greater than OMEGA, then + * (z, h) <- falsam + * ... + * 32: sigma <- sigEncode(c_tilda, z mod+/- q, h) + * ... + * + * FIPS 204. 8.4: Algorithm 33 MakeHint(z, r) + * 1: r1 <- HighBits(r) + * 2: v1 <- HightBits(r+z) + * 3: return [[r1 != v1]] + * + * FIPS 204. 8.2: Algorithm 20 sigEncode(c_tilde, z, h) + * ... + * 5: sigma <- sigma || HintBitPack(h) + * ... + * + * FIPS 204. 8.1: Algorithm 14 HintBitPack(h) + * ... + * 4: for j from 0 to 255 do + * 5: if h[i]j != 0 then + * 6: y[Index] <- j + * 7: Index <- Index + 1 + * 8: end if + * 9: end for + * ... + * + * @param [in] s Vector of polynomials that is sum of ct0 and w0. + * @param [in] w1 Vector of polynomials that is high part of w. + * @param [in] omega Maximum number of hints allowed. + * @param [out] h Encoded hints. + * @param [in, out] idxp Index to write next hint into. + * return Number of hints on success. + * return Falsam of -1 when too many hints. + */ +static int dilithium_make_hint_32(const sword32* s, const sword32* w1, + byte omega, byte* h, byte *idxp) +{ + unsigned int j; + byte idx = *idxp; + + (void)omega; + + /* Alg 14, Step 3: For each value of polynomial. */ + for (j = 0; j < DILITHIUM_N; j++) { + /* Alg 14, Step 4: Check whether hint is required. + * Did sum end up greater than low modulus or + * sum end up less than the negative of low modulus or + * sum is the negative of the low modulus and w1 is not zero, + * then w1 will be modified. + */ + if ((s[j] > (sword32)DILITHIUM_Q_LOW_32) || + (s[j] < -(sword32)DILITHIUM_Q_LOW_32) || + ((s[j] == -(sword32)DILITHIUM_Q_LOW_32) && + (w1[j] != 0))) { + /* Alg 14, Step 6, 7: Put index as hint modifier. */ + h[idx++] = (byte)j; + /* Alg 2, Step 27: If there are too many hints, return + * falsam of -1. */ + if (idx > omega) { + return -1; + } + } + } + + *idxp = idx; + return 0; +} +#endif + +#ifndef WOLFSSL_DILITHIUM_SIGN_SMALL_MEM +/* Compute hints indicating whether adding ct0 to w alters high bits of w. + * + * FIPS 204. 6: Algorithm 2 ML-DSA.Sign(sk, M) + * ... + * 26: h <- MakeHint(-<>, w - <> + <>) + * 27: if ... or the number of 1's in h is greater than OMEGA, then + * (z, h) <- falsam + * ... + * 32: sigma <- sigEncode(c_tilda, z mod+/- q, h) + * ... + * + * FIPS 204. 8.4: Algorithm 33 MakeHint(z, r) + * 1: r1 <- HighBits(r) + * 2: v1 <- HightBits(r+z) + * 3: return [[r1 != v1]] + * + * FIPS 204. 8.2: Algorithm 20 sigEncode(c_tilde, z, h) + * ... + * 5: sigma <- sigma || HintBitPack(h) + * ... + * + * FIPS 204. 8.1: Algorithm 14 HintBitPack(h) + * ... + * 2: Index <- 0 + * 3. for i from 0 to k - 1 do + * 4: for j from 0 to 255 do + * 5: if h[i]j != 0 then + * 6: y[Index] <- j + * 7: Index <- Index + 1 + * 8: end if + * 9: end for + * 10: y[OMEGA + i] <- Index + * 11: end for + * 12: return y + * + * @param [in] s Vector of polynomials that is sum of ct0 and w0. + * @param [in] w1 Vector of polynomials that is high part of w. + * @param [in] k Dimension of vectors. + * @param [in] gamma2 Low-order rounding range, GAMMA2. + * @param [in] omega Maximum number of hints allowed. + * @param [out] h Encoded hints. + * return Number of hints on success. + * return Falsam of -1 when too many hints. + */ +static int dilithium_make_hint(const sword32* s, const sword32* w1, byte k, + word32 gamma2, byte omega, byte* h) +{ + unsigned int i; + byte idx = 0; + + (void)k; + (void)omega; + +#ifndef WOLFSSL_NO_ML_DSA_44 + if (gamma2 == DILITHIUM_Q_LOW_88) { + /* Alg 14, Step 2: For each polynomial of vector. */ + for (i = 0; i < PARAMS_ML_DSA_44_K; i++) { + if (dilithium_make_hint_88(s, w1, h, &idx) == -1) { + return -1; + } + /* Alg 14, Step 10: Store count of hints for polynomial at end of + * list. */ + h[PARAMS_ML_DSA_44_OMEGA + i] = idx; + /* Next polynomial. */ + s += DILITHIUM_N; + w1 += DILITHIUM_N; + } + } + else +#endif +#if !defined(WOLFSSL_NO_ML_DSA_65) || !defined(WOLFSSL_NO_ML_DSA_87) + if (gamma2 == DILITHIUM_Q_LOW_32) { + /* Alg 14, Step 2: For each polynomial of vector. */ + for (i = 0; i < k; i++) { + if (dilithium_make_hint_32(s, w1, omega, h, &idx) == -1) { + return -1; + } /* Alg 14, Step 10: Store count of hints for polynomial at end of * list. */ h[omega + i] = idx; - /* Next polynomial. */ s += DILITHIUM_N; w1 += DILITHIUM_N; @@ -2835,6 +3105,7 @@ static int dilithium_make_hint(const sword32* s, const sword32* w1, byte k, XMEMSET(h + idx, 0, omega - idx); return idx; } +#endif /* !WOLFSSL_DILITHIUM_SIGN_SMALL_MEM */ #endif /* !WOLFSSL_DILITHIUM_NO_SIGN */ @@ -3595,6 +3866,11 @@ static void dilithium_ntt(sword32* r) #endif } +#if !defined(WOLFSSL_DILITHIUM_NO_VERIFY) || \ + defined(WOLFSSL_DILITHIUM_CHECK_KEY) || \ + (!defined(WOLFSSL_DILITHIUM_NO_SIGN) && \ + (defined(WC_DILITHIUM_CACHE_PRIV_VECTORS) || \ + !defined(WOLFSSL_DILITHIUM_SIGN_SMALL_MEM))) /* Number-Theoretic Transform. * * @param [in, out] r Vector of polynomials to transform. @@ -3610,6 +3886,7 @@ static void dilithium_vec_ntt(sword32* r, byte l) } } #endif +#endif #ifndef WOLFSSL_DILITHIUM_SMALL @@ -3956,7 +4233,11 @@ static void dilithium_ntt_small(sword32* r) #endif } -#ifndef WOLFSSL_DILITHIUM_VERIFY_ONLY +#if !defined(WOLFSSL_DILITHIUM_NO_MAKE_KEY) || \ + defined(WOLFSSL_DILITHIUM_CHECK_KEY) || \ + (!defined(WOLFSSL_DILITHIUM_NO_SIGN) && \ + (defined(WC_DILITHIUM_CACHE_PRIV_VECTORS) || \ + !defined(WOLFSSL_DILITHIUM_SIGN_SMALL_MEM))) /* Number-Theoretic Transform with small initial values. * * @param [in, out] r Vector of polynomials to transform. @@ -4409,8 +4690,12 @@ static void dilithium_invntt(sword32* r) } -#if !defined(WOLFSSL_DILITHIUM_VERIFY_ONLY) || \ - !defined(WOLFSSL_DILITHIUM_VERIFY_SMALL_MEM) +#if !defined(WOLFSSL_DILITHIUM_NO_MAKE_KEY) || \ + defined(WOLFSSL_DILITHIUM_CHECK_KEY) || \ + (!defined(WOLFSSL_DILITHIUM_NO_VERIFY) && \ + !defined(WOLFSSL_DILITHIUM_VERIFY_SMALL_MEM)) || \ + (!defined(WOLFSSL_DILITHIUM_NO_SIGN) && \ + !defined(WOLFSSL_DILITHIUM_SIGN_SMALL_MEM)) /* Inverse Number-Theoretic Transform. * * @param [in, out] r Vector of polynomials to transform. @@ -4427,8 +4712,12 @@ static void dilithium_vec_invntt(sword32* r, byte l) } #endif -#if !defined(WOLFSSL_DILITHIUM_VERIFY_ONLY) || \ - !defined(WOLFSSL_DILITHIUM_VERIFY_SMALL_MEM) +#if !defined(WOLFSSL_DILITHIUM_NO_MAKE_KEY) || \ + defined(WOLFSSL_DILITHIUM_CHECK_KEY) || \ + (!defined(WOLFSSL_DILITHIUM_NO_VERIFY) && \ + !defined(WOLFSSL_DILITHIUM_VERIFY_SMALL_MEM)) || \ + (!defined(WOLFSSL_DILITHIUM_NO_SIGN) && \ + !defined(WOLFSSL_DILITHIUM_SIGN_SMALL_MEM)) /* Matrix multiplication. * * @param [out] r Vector of polynomials that is result. @@ -4600,6 +4889,56 @@ static void dilithium_matrix_mul(sword32* r, const sword32* m, const sword32* v, #if !defined(WOLFSSL_DILITHIUM_NO_SIGN) || \ (!defined(WOLFSSL_DILITHIUM_NO_VERIFY) && \ !defined(WOLFSSL_DILITHIUM_VERIFY_SMALL_MEM)) +/* Polynomial multiplication. + * + * @param [out] r Polynomial result. + * @param [in] a Polynomial + * @param [in] b Polynomial. + */ +static void dilithium_mul(sword32* r, sword32* a, sword32* b) +{ + unsigned int e; +#ifdef WOLFSSL_DILITHIUM_SMALL + for (e = 0; e < DILITHIUM_N; e++) { + r[e] = dilithium_mont_red((sword64)a[e] * b[e]); + } +#elif defined(WOLFSSL_DILITHIUM_NO_LARGE_CODE) + for (e = 0; e < DILITHIUM_N; e += 8) { + r[e+0] = dilithium_mont_red((sword64)a[e+0] * b[e+0]); + r[e+1] = dilithium_mont_red((sword64)a[e+1] * b[e+1]); + r[e+2] = dilithium_mont_red((sword64)a[e+2] * b[e+2]); + r[e+3] = dilithium_mont_red((sword64)a[e+3] * b[e+3]); + r[e+4] = dilithium_mont_red((sword64)a[e+4] * b[e+4]); + r[e+5] = dilithium_mont_red((sword64)a[e+5] * b[e+5]); + r[e+6] = dilithium_mont_red((sword64)a[e+6] * b[e+6]); + r[e+7] = dilithium_mont_red((sword64)a[e+7] * b[e+7]); + } +#else + for (e = 0; e < DILITHIUM_N; e += 16) { + r[e+ 0] = dilithium_mont_red((sword64)a[e+ 0] * b[e+ 0]); + r[e+ 1] = dilithium_mont_red((sword64)a[e+ 1] * b[e+ 1]); + r[e+ 2] = dilithium_mont_red((sword64)a[e+ 2] * b[e+ 2]); + r[e+ 3] = dilithium_mont_red((sword64)a[e+ 3] * b[e+ 3]); + r[e+ 4] = dilithium_mont_red((sword64)a[e+ 4] * b[e+ 4]); + r[e+ 5] = dilithium_mont_red((sword64)a[e+ 5] * b[e+ 5]); + r[e+ 6] = dilithium_mont_red((sword64)a[e+ 6] * b[e+ 6]); + r[e+ 7] = dilithium_mont_red((sword64)a[e+ 7] * b[e+ 7]); + r[e+ 8] = dilithium_mont_red((sword64)a[e+ 8] * b[e+ 8]); + r[e+ 9] = dilithium_mont_red((sword64)a[e+ 9] * b[e+ 9]); + r[e+10] = dilithium_mont_red((sword64)a[e+10] * b[e+10]); + r[e+11] = dilithium_mont_red((sword64)a[e+11] * b[e+11]); + r[e+12] = dilithium_mont_red((sword64)a[e+12] * b[e+12]); + r[e+13] = dilithium_mont_red((sword64)a[e+13] * b[e+13]); + r[e+14] = dilithium_mont_red((sword64)a[e+14] * b[e+14]); + r[e+15] = dilithium_mont_red((sword64)a[e+15] * b[e+15]); + } +#endif +} + +#if (!defined(WOLFSSL_DILITHIUM_NO_SIGN) && \ + !defined(WOLFSSL_DILITHIUM_SIGN_SMALL_MEM)) || \ + (!defined(WOLFSSL_DILITHIUM_NO_VERIFY) && \ + !defined(WOLFSSL_DILITHIUM_VERIFY_SMALL_MEM)) /* Vector multiplication. * * @param [out] r Vector of polynomials that is result. @@ -4612,49 +4951,41 @@ static void dilithium_vec_mul(sword32* r, sword32* a, sword32* b, byte l) byte i; for (i = 0; i < l; i++) { - unsigned int e; -#ifdef WOLFSSL_DILITHIUM_SMALL - for (e = 0; e < DILITHIUM_N; e++) { - r[e] = dilithium_mont_red((sword64)a[e] * b[e]); - } -#elif defined(WOLFSSL_DILITHIUM_NO_LARGE_CODE) - for (e = 0; e < DILITHIUM_N; e += 8) { - r[e+0] = dilithium_mont_red((sword64)a[e+0] * b[e+0]); - r[e+1] = dilithium_mont_red((sword64)a[e+1] * b[e+1]); - r[e+2] = dilithium_mont_red((sword64)a[e+2] * b[e+2]); - r[e+3] = dilithium_mont_red((sword64)a[e+3] * b[e+3]); - r[e+4] = dilithium_mont_red((sword64)a[e+4] * b[e+4]); - r[e+5] = dilithium_mont_red((sword64)a[e+5] * b[e+5]); - r[e+6] = dilithium_mont_red((sword64)a[e+6] * b[e+6]); - r[e+7] = dilithium_mont_red((sword64)a[e+7] * b[e+7]); - } -#else - for (e = 0; e < DILITHIUM_N; e += 16) { - r[e+ 0] = dilithium_mont_red((sword64)a[e+ 0] * b[e+ 0]); - r[e+ 1] = dilithium_mont_red((sword64)a[e+ 1] * b[e+ 1]); - r[e+ 2] = dilithium_mont_red((sword64)a[e+ 2] * b[e+ 2]); - r[e+ 3] = dilithium_mont_red((sword64)a[e+ 3] * b[e+ 3]); - r[e+ 4] = dilithium_mont_red((sword64)a[e+ 4] * b[e+ 4]); - r[e+ 5] = dilithium_mont_red((sword64)a[e+ 5] * b[e+ 5]); - r[e+ 6] = dilithium_mont_red((sword64)a[e+ 6] * b[e+ 6]); - r[e+ 7] = dilithium_mont_red((sword64)a[e+ 7] * b[e+ 7]); - r[e+ 8] = dilithium_mont_red((sword64)a[e+ 8] * b[e+ 8]); - r[e+ 9] = dilithium_mont_red((sword64)a[e+ 9] * b[e+ 9]); - r[e+10] = dilithium_mont_red((sword64)a[e+10] * b[e+10]); - r[e+11] = dilithium_mont_red((sword64)a[e+11] * b[e+11]); - r[e+12] = dilithium_mont_red((sword64)a[e+12] * b[e+12]); - r[e+13] = dilithium_mont_red((sword64)a[e+13] * b[e+13]); - r[e+14] = dilithium_mont_red((sword64)a[e+14] * b[e+14]); - r[e+15] = dilithium_mont_red((sword64)a[e+15] * b[e+15]); - } -#endif + dilithium_mul(r, a, b); r += DILITHIUM_N; b += DILITHIUM_N; } } #endif +#endif #ifndef WOLFSSL_DILITHIUM_NO_SIGN +/* Modulo reduce values in polynomial. Range (-2^31)..(2^31-1). + * + * @param [in, out] a Polynomial. + */ +static void dilithium_poly_red(sword32* a) +{ + word16 j; +#ifdef WOLFSSL_DILITHIUM_SMALL + for (j = 0; j < DILITHIUM_N; j++) { + a[j] = dilithium_red(a[j]); + } +#else + for (j = 0; j < DILITHIUM_N; j += 8) { + a[j+0] = dilithium_red(a[j+0]); + a[j+1] = dilithium_red(a[j+1]); + a[j+2] = dilithium_red(a[j+2]); + a[j+3] = dilithium_red(a[j+3]); + a[j+4] = dilithium_red(a[j+4]); + a[j+5] = dilithium_red(a[j+5]); + a[j+6] = dilithium_red(a[j+6]); + a[j+7] = dilithium_red(a[j+7]); + } +#endif +} + +#ifndef WOLFSSL_DILITHIUM_SIGN_SMALL_MEM /* Modulo reduce values in polynomials of vector. Range (-2^31)..(2^31-1). * * @param [in, out] a Vector of polynomials. @@ -4665,32 +4996,48 @@ static void dilithium_vec_red(sword32* a, byte l) byte i; for (i = 0; i < l; i++) { - word16 j; -#ifdef WOLFSSL_DILITHIUM_SMALL - for (j = 0; j < DILITHIUM_N; j++) { - a[j] = dilithium_red(a[j]); - } -#else - for (j = 0; j < DILITHIUM_N; j += 8) { - a[j+0] = dilithium_red(a[j+0]); - a[j+1] = dilithium_red(a[j+1]); - a[j+2] = dilithium_red(a[j+2]); - a[j+3] = dilithium_red(a[j+3]); - a[j+4] = dilithium_red(a[j+4]); - a[j+5] = dilithium_red(a[j+5]); - a[j+6] = dilithium_red(a[j+6]); - a[j+7] = dilithium_red(a[j+7]); - } -#endif + dilithium_poly_red(a); a += DILITHIUM_N; } } +#endif /* WOLFSSL_DILITHIUM_SIGN_SMALL_MEM*/ #endif /* !WOLFSSL_DILITHIUM_NO_SIGN */ #if (!defined(WOLFSSL_DILITHIUM_NO_SIGN) || \ (!defined(WOLFSSL_DILITHIUM_NO_VERIFY) && \ !defined(WOLFSSL_DILITHIUM_VERIFY_SMALL_MEM))) || \ defined(WOLFSSL_DILITHIUM_CHECK_KEY) +/* Subtract polynomials a from r. r -= a. + * + * @param [out] r Polynomial to subtract from. + * @param [in] a Polynomial to subtract. + */ +static void dilithium_sub(sword32* r, const sword32* a) +{ + word16 j; +#ifdef WOLFSSL_DILITHIUM_SMALL + for (j = 0; j < DILITHIUM_N; j++) { + r[j] -= a[j]; + } +#else + for (j = 0; j < DILITHIUM_N; j += 8) { + r[j+0] -= a[j+0]; + r[j+1] -= a[j+1]; + r[j+2] -= a[j+2]; + r[j+3] -= a[j+3]; + r[j+4] -= a[j+4]; + r[j+5] -= a[j+5]; + r[j+6] -= a[j+6]; + r[j+7] -= a[j+7]; + } +#endif +} + +#if defined(WOLFSSL_DILITHIUM_CHECK_KEY) || \ + (!defined(WOLFSSL_DILITHIUM_NO_VERIFY) && \ + !defined(WOLFSSL_DILITHIUM_VERIFY_SMALL_MEM)) || \ + (!defined(WOLFSSL_DILITHIUM_NO_SIGN) && \ + !defined(WOLFSSL_DILITHIUM_SIGN_SMALL_MEM)) /* Subtract vector a from r. r -= a. * * @param [out] r Vector of polynomials that is result. @@ -4702,31 +5049,45 @@ static void dilithium_vec_sub(sword32* r, const sword32* a, byte l) byte i; for (i = 0; i < l; i++) { - word16 j; -#ifdef WOLFSSL_DILITHIUM_SMALL - for (j = 0; j < DILITHIUM_N; j++) { - r[j] -= a[j]; - } -#else - for (j = 0; j < DILITHIUM_N; j += 8) { - r[j+0] -= a[j+0]; - r[j+1] -= a[j+1]; - r[j+2] -= a[j+2]; - r[j+3] -= a[j+3]; - r[j+4] -= a[j+4]; - r[j+5] -= a[j+5]; - r[j+6] -= a[j+6]; - r[j+7] -= a[j+7]; - } -#endif + dilithium_sub(r, a); r += DILITHIUM_N; a += DILITHIUM_N; } } #endif +#endif #ifndef WOLFSSL_DILITHIUM_VERIFY_ONLY +/* Add polynomials a to r. r += a. + * + * @param [out] r Polynomial to add to. + * @param [in] a Polynomial to add. + */ +static void dilithium_add(sword32* r, const sword32* a) +{ + word16 j; +#ifdef WOLFSSL_DILITHIUM_SMALL + for (j = 0; j < DILITHIUM_N; j++) { + r[j] += a[j]; + } +#else + for (j = 0; j < DILITHIUM_N; j += 8) { + r[j+0] += a[j+0]; + r[j+1] += a[j+1]; + r[j+2] += a[j+2]; + r[j+3] += a[j+3]; + r[j+4] += a[j+4]; + r[j+5] += a[j+5]; + r[j+6] += a[j+6]; + r[j+7] += a[j+7]; + } +#endif +} +#if !defined(WOLFSSL_DILITHIUM_NO_MAKE_KEY) || \ + defined(WOLFSSL_DILITHIUM_CHECK_KEY) || \ + (!defined(WOLFSSL_DILITHIUM_NO_SIGN) && \ + !defined(WOLFSSL_DILITHIUM_SIGN_SMALL_MEM)) /* Add vector a to r. r += a. * * @param [out] r Vector of polynomials that is result. @@ -4738,29 +5099,43 @@ static void dilithium_vec_add(sword32* r, const sword32* a, byte l) byte i; for (i = 0; i < l; i++) { - word16 j; -#ifdef WOLFSSL_DILITHIUM_SMALL - for (j = 0; j < DILITHIUM_N; j++) { - r[j] += a[j]; - } -#else - for (j = 0; j < DILITHIUM_N; j += 8) { - r[j+0] += a[j+0]; - r[j+1] += a[j+1]; - r[j+2] += a[j+2]; - r[j+3] += a[j+3]; - r[j+4] += a[j+4]; - r[j+5] += a[j+5]; - r[j+6] += a[j+6]; - r[j+7] += a[j+7]; - } -#endif + dilithium_add(r, a); r += DILITHIUM_N; a += DILITHIUM_N; } } +#endif -/* Make valus in polynomials of vector be in positive range. +/* Make values in polynomial be in positive range. + * + * @param [in, out] a Polynomial. + */ +static void dilithium_make_pos(sword32* a) +{ + word16 j; +#ifdef WOLFSSL_DILITHIUM_SMALL + for (j = 0; j < DILITHIUM_N; j++) { + a[j] += (0 - (((word32)a[j]) >> 31)) & DILITHIUM_Q; + } +#else + for (j = 0; j < DILITHIUM_N; j += 8) { + a[j+0] += (0 - (((word32)a[j+0]) >> 31)) & DILITHIUM_Q; + a[j+1] += (0 - (((word32)a[j+1]) >> 31)) & DILITHIUM_Q; + a[j+2] += (0 - (((word32)a[j+2]) >> 31)) & DILITHIUM_Q; + a[j+3] += (0 - (((word32)a[j+3]) >> 31)) & DILITHIUM_Q; + a[j+4] += (0 - (((word32)a[j+4]) >> 31)) & DILITHIUM_Q; + a[j+5] += (0 - (((word32)a[j+5]) >> 31)) & DILITHIUM_Q; + a[j+6] += (0 - (((word32)a[j+6]) >> 31)) & DILITHIUM_Q; + a[j+7] += (0 - (((word32)a[j+7]) >> 31)) & DILITHIUM_Q; + } +#endif +} + +#if !defined(WOLFSSL_DILITHIUM_NO_MAKE_KEY) || \ + defined(WOLFSSL_DILITHIUM_CHECK_KEY) || \ + (!defined(WOLFSSL_DILITHIUM_NO_SIGN) && \ + !defined(WOLFSSL_DILITHIUM_SIGN_SMALL_MEM)) +/* Make values in polynomials of vector be in positive range. * * @param [in, out] a Vector of polynomials. * @param [in] l Dimension of vector. @@ -4770,26 +5145,11 @@ static void dilithium_vec_make_pos(sword32* a, byte l) byte i; for (i = 0; i < l; i++) { - word16 j; -#ifdef WOLFSSL_DILITHIUM_SMALL - for (j = 0; j < DILITHIUM_N; j++) { - a[j] += (0 - (((word32)a[j]) >> 31)) & DILITHIUM_Q; - } -#else - for (j = 0; j < DILITHIUM_N; j += 8) { - a[j+0] += (0 - (((word32)a[j+0]) >> 31)) & DILITHIUM_Q; - a[j+1] += (0 - (((word32)a[j+1]) >> 31)) & DILITHIUM_Q; - a[j+2] += (0 - (((word32)a[j+2]) >> 31)) & DILITHIUM_Q; - a[j+3] += (0 - (((word32)a[j+3]) >> 31)) & DILITHIUM_Q; - a[j+4] += (0 - (((word32)a[j+4]) >> 31)) & DILITHIUM_Q; - a[j+5] += (0 - (((word32)a[j+5]) >> 31)) & DILITHIUM_Q; - a[j+6] += (0 - (((word32)a[j+6]) >> 31)) & DILITHIUM_Q; - a[j+7] += (0 - (((word32)a[j+7]) >> 31)) & DILITHIUM_Q; - } -#endif + dilithium_make_pos(a); a += DILITHIUM_N; } } +#endif #endif /* !WOLFSSL_DILITHIUM_VERIFY_ONLY */ @@ -5008,6 +5368,8 @@ static int dilithium_make_key(dilithium_key* key, WC_RNG* rng) #ifndef WOLFSSL_DILITHIUM_NO_SIGN +#if !defined(WOLFSSL_DILITHIUM_SIGN_SMALL_MEM) || \ + defined(WC_DILITHIUM_CACHE_PRIV_VECTORS) /* Decode, from private key, and NTT private key vectors s1, s2, and t0. * * FIPS 204. 6: Algorithm 2 MD-DSA.Sign(sk, M) @@ -5049,6 +5411,7 @@ static void dilithium_make_priv_vecs(dilithium_key* key, sword32* s1, key->privVecsSet = 1; #endif } +#endif /* Sign a message with the key and a seed. * @@ -5105,6 +5468,7 @@ static void dilithium_make_priv_vecs(dilithium_key* key, sword32* s1, static int dilithium_sign_msg_with_seed(dilithium_key* key, const byte* seed, const byte* msg, word32 msgLen, byte* sig, word32 *sigLen) { +#ifndef WOLFSSL_DILITHIUM_SIGN_SMALL_MEM int ret = 0; const wc_dilithium_params* params = key->params; byte* pub_seed = key->k; @@ -5249,10 +5613,10 @@ static int dilithium_sign_msg_with_seed(dilithium_key* key, const byte* seed, byte* commit = sig; /* Step 12: Compute vector y from private random seed and kappa. */ - dilithium_expand_mask(&key->shake, priv_rand_seed, kappa, + dilithium_vec_expand_mask(&key->shake, priv_rand_seed, kappa, params->gamma1_bits, y, params->l); #ifdef WOLFSSL_DILITHIUM_SIGN_CHECK_Y - valid = dilithium_check_low(y, params->l, + valid = dilithium_vec_check_low(y, params->l, (1 << params->gamma1_bits) - params->beta); if (valid) #endif @@ -5266,7 +5630,7 @@ static int dilithium_sign_msg_with_seed(dilithium_key* key, const byte* seed, dilithium_vec_make_pos(w, params->k); dilithium_vec_decompose(w, params->k, params->gamma2, w0, w1); #ifdef WOLFSSL_DILITHIUM_SIGN_CHECK_W0 - valid = dilithium_check_low(w0, params->k, + valid = dilithium_vec_check_low(w0, params->k, params->gamma2 - params->beta); } if (valid) { @@ -5295,7 +5659,7 @@ static int dilithium_sign_msg_with_seed(dilithium_key* key, const byte* seed, dilithium_vec_red(w0, params->k); /* Step 23: Check w0 - cs2 has low enough values. */ hi = params->gamma2 - params->beta; - valid = dilithium_check_low(w0, params->k, hi); + valid = dilithium_vec_check_low(w0, params->k, hi); if (valid) { /* Step 19: cs1 = NTT-1(c o s1) */ dilithium_vec_mul(z, c, s1, params->l); @@ -5305,7 +5669,7 @@ static int dilithium_sign_msg_with_seed(dilithium_key* key, const byte* seed, dilithium_vec_red(z, params->l); /* Step 23: Check z has low enough values. */ hi = (1 << params->gamma1_bits) - params->beta; - valid = dilithium_check_low(z, params->l, hi); + valid = dilithium_vec_check_low(z, params->l, hi); } if (valid) { /* Step 25: ct0 = NTT-1(c o t0) */ @@ -5313,12 +5677,12 @@ static int dilithium_sign_msg_with_seed(dilithium_key* key, const byte* seed, dilithium_vec_invntt(ct0, params->k); /* Step 27: Check ct0 has low enough values. */ hi = params->gamma2; - valid = dilithium_check_low(ct0, params->k, hi); + valid = dilithium_vec_check_low(ct0, params->k, hi); } if (valid) { /* Step 26: ct0 = ct0 + w0 */ dilithium_vec_add(ct0, w0, params->k); - dilithium_vec_red(ct0, params->l); + dilithium_vec_red(ct0, params->k); /* Step 26, 27: Make hint from ct0 and w1 and check * number of hints is valid. * Step 32: h is encoded into signature. @@ -5352,6 +5716,346 @@ static int dilithium_sign_msg_with_seed(dilithium_key* key, const byte* seed, XFREE(y, NULL, DYNAMIC_TYPE_DILITHIUM); return ret; +#else + int ret = 0; + const wc_dilithium_params* params = key->params; + byte* pub_seed = key->k; + byte* k = pub_seed + DILITHIUM_PUB_SEED_SZ; + byte* tr = k + DILITHIUM_K_SZ; + const byte* s1p = tr + DILITHIUM_TR_SZ; + const byte* s2p = s1p + params->s1EncSz; + const byte* t0p = s2p + params->s2EncSz; + sword32* a = NULL; + sword32* s1 = NULL; + sword32* s2 = NULL; + sword32* t0 = NULL; + sword32* y = NULL; + sword32* y_ntt = NULL; + sword32* w0 = NULL; + sword32* w1 = NULL; + sword32* c = NULL; + sword32* z = NULL; + sword32* ct0 = NULL; + byte data[DILITHIUM_RND_SZ + DILITHIUM_MU_SZ]; + byte* mu = data + DILITHIUM_RND_SZ; + byte priv_rand_seed[DILITHIUM_Y_SEED_SZ]; + byte* h = sig + params->lambda * 2 + params->zEncSz; + + /* Check the signature buffer isn't too small. */ + if ((ret == 0) && (*sigLen < params->sigSz)) { + ret = BUFFER_E; + } + if (ret == 0) { + /* Return the size of the signature. */ + *sigLen = params->sigSz; + } + + /* Allocate memory for large intermediates. */ + if (ret == 0) { + unsigned int allocSz; + + /* y-l, w0-k, w1-k, c-1, s1-1, A-1 */ + allocSz = params->s1Sz + params->s2Sz + params->s2Sz + + DILITHIUM_POLY_SIZE + DILITHIUM_POLY_SIZE + DILITHIUM_POLY_SIZE; + y = (sword32*)XMALLOC(allocSz, NULL, DYNAMIC_TYPE_DILITHIUM); + if (y == NULL) { + ret = MEMORY_E; + } + else { + w0 = y + params->s1Sz / sizeof(*y_ntt); + w1 = w0 + params->s2Sz / sizeof(*w0); + c = w1 + params->s2Sz / sizeof(*w1); + s1 = c + DILITHIUM_N; + a = s1 + DILITHIUM_N; + s2 = s1; + t0 = s1; + ct0 = s1; + z = s1; + y_ntt = s1; + } + } + + if (ret == 0) { + /* Step 7: Copy random into buffer for hashing. */ + XMEMCPY(data, seed, DILITHIUM_RND_SZ); + + /* Step 6: Compute the hash of tr, public key hash, and message. */ + ret = dilithium_hash256(&key->shake, tr, DILITHIUM_TR_SZ, msg, msgLen, + mu, DILITHIUM_MU_SZ); + } + if (ret == 0) { + /* Step 9: Compute private random using hash. */ + ret = dilithium_hash256(&key->shake, k, DILITHIUM_K_SZ, data, + DILITHIUM_RND_SZ + DILITHIUM_MU_SZ, priv_rand_seed, + DILITHIUM_PRIV_RAND_SEED_SZ); + } + if (ret == 0) { + word16 kappa = 0; + int valid; + + /* Step 11: Start rejection sampling loop */ + do { + byte w1e[DILITHIUM_MAX_W1_ENC_SZ]; + sword32* w = w1; + byte* commit = sig; + byte r; + byte s; + byte aseed[DILITHIUM_GEN_A_SEED_SZ]; + sword32 hi; + sword32* at = a; + sword32* wt = w; + sword32* w0t = w0; + sword32* w1t = w1; + + valid = 1; + /* Step 12: Compute vector y from private random seed and kappa. */ + dilithium_vec_expand_mask(&key->shake, priv_rand_seed, kappa, + params->gamma1_bits, y, params->l); + #ifdef WOLFSSL_DILITHIUM_SIGN_CHECK_Y + valid = dilithium_vec_check_low(y, params->l, + (1 << params->gamma1_bits) - params->beta); + #endif + + /* Step 5: Create the matrix A from the public seed. */ + /* Copy the seed into a buffer that has space for s and r. */ + XMEMCPY(aseed, pub_seed, DILITHIUM_PUB_SEED_SZ); + /* Alg 26. Step 1: Loop over first dimension of matrix. */ + for (r = 0; (ret == 0) && valid && (r < params->k); r++) { + unsigned int e; + sword32* yt = y; + + /* Put r/i into buffer to be hashed. */ + aseed[DILITHIUM_PUB_SEED_SZ + 1] = r; + /* Alg 26. Step 2: Loop over second dimension of matrix. */ + for (s = 0; (ret == 0) && (s < params->l); s++) { + /* Put s into buffer to be hashed. */ + aseed[DILITHIUM_PUB_SEED_SZ + 0] = s; + /* Alg 26. Step 3: Create polynomial from hashing seed. */ + ret = dilithium_rej_ntt_poly(&key->shake, aseed, at, + NULL); + if (ret != 0) { + break; + } + XMEMCPY(y_ntt, yt, DILITHIUM_POLY_SIZE); + dilithium_ntt(y_ntt); + /* Matrix multiply. */ + if (s == 0) { + for (e = 0; e < DILITHIUM_N; e++) { + wt[e] = dilithium_mont_red((sword64)at[e] * + y_ntt[e]); + } + } + else { + for (e = 0; e < DILITHIUM_N; e++) { + wt[e] += dilithium_mont_red((sword64)at[e] * + y_ntt[e]); + } + } + /* Next polynomial. */ + yt += DILITHIUM_N; + } + dilithium_invntt(wt); + /* Step 14, Step 22: Make values positive and decompose. */ + dilithium_make_pos(wt); + #ifndef WOLFSSL_NO_ML_DSA_44 + if (params->gamma2 == DILITHIUM_Q_LOW_88) { + /* For each value of polynomial. */ + for (e = 0; e < DILITHIUM_N; e++) { + /* Decompose value into two vectors. */ + dilithium_decompose_q88(wt[e], &w0t[e], &w1t[e]); + } + } + #endif + #if !defined(WOLFSSL_NO_ML_DSA_65) || !defined(WOLFSSL_NO_ML_DSA_87) + if (params->gamma2 == DILITHIUM_Q_LOW_32) { + /* For each value of polynomial. */ + for (e = 0; e < DILITHIUM_N; e++) { + /* Decompose value into two vectors. */ + dilithium_decompose_q32(wt[e], &w0t[e], &w1t[e]); + } + } + #endif + #ifdef WOLFSSL_DILITHIUM_SIGN_CHECK_W0 + valid = dilithium_vec_check_low(w0t, + params->gamma2 - params->beta); + #endif + wt += DILITHIUM_N; + w0t += DILITHIUM_N; + w1t += DILITHIUM_N; + } + if ((ret == 0) && valid) { + sword32* yt = y; + const byte* s1pt = s1p; + byte* ze = sig + params->lambda * 2; + + /* Step 15: Encode w1. */ + dilithium_vec_encode_w1(w1, params->k, params->gamma2, w1e); + /* Step 15: Hash mu and encoded w1. + * Step 32: Hash is stored in signature. */ + ret = dilithium_hash256(&key->shake, mu, DILITHIUM_MU_SZ, + w1e, params->w1EncSz, commit, 2 * params->lambda); + if (ret == 0) { + /* Step 17: Compute c from first 256 bits of commit. */ + ret = dilithium_sample_in_ball(&key->shake, commit, + params->tau, c, NULL); + } + if (ret == 0) { + /* Step 18: NTT(c). */ + dilithium_ntt_small(c); + } + + for (s = 0; (ret == 0) && valid && (s < params->l); s++) { + #if !defined(WOLFSSL_NO_ML_DSA_44) || \ + !defined(WOLFSSL_NO_ML_DSA_87) + /* -2..2 */ + if (params->eta == DILITHIUM_ETA_2) { + dilithium_decode_eta_2_bits(s1pt, s1); + s1pt += DILITHIUM_ETA_2_BITS * DILITHIUM_N / 8; + } + #endif + #ifndef WOLFSSL_NO_ML_DSA_65 + /* -4..4 */ + if (params->eta == DILITHIUM_ETA_4) { + dilithium_decode_eta_4_bits(s1pt, s1); + s1pt += DILITHIUM_N / 2; + } + #endif + dilithium_ntt_small(s1); + dilithium_mul(z, c, s1); + /* Step 19: cs1 = NTT-1(c o s1) */ + dilithium_invntt(z); + /* Step 21: z = y + cs1 */ + dilithium_add(z, yt); + dilithium_poly_red(z); + /* Step 23: Check z has low enough values. */ + hi = (1 << params->gamma1_bits) - params->beta; + valid = dilithium_check_low(z, hi); + if (valid) { + /* Step 32: Encode z into signature. + * Commit (c) and h already encoded into signature. */ + #if !defined(WOLFSSL_NO_ML_DSA_44) + if (params->gamma1_bits == DILITHIUM_GAMMA1_BITS_17) { + dilithium_encode_gamma1_17_bits(z, ze); + /* Move to next place to encode to. */ + ze += DILITHIUM_GAMMA1_17_ENC_BITS / 2 * + DILITHIUM_N / 4; + } + else + #endif + #if !defined(WOLFSSL_NO_ML_DSA_65) || \ + !defined(WOLFSSL_NO_ML_DSA_87) + if (params->gamma1_bits == DILITHIUM_GAMMA1_BITS_19) { + dilithium_encode_gamma1_19_bits(z, ze); + /* Move to next place to encode to. */ + ze += DILITHIUM_GAMMA1_19_ENC_BITS / 2 * + DILITHIUM_N / 4; + } + #endif + } + + yt += DILITHIUM_N; + } + } + if ((ret == 0) && valid) { + const byte* t0pt = t0p; + const byte* s2pt = s2p; + sword32* cs2 = ct0; + w0t = w0; + w1t = w1; + byte idx = 0; + + for (r = 0; valid && (r < params->k); r++) { + #if !defined(WOLFSSL_NO_ML_DSA_44) || \ + !defined(WOLFSSL_NO_ML_DSA_87) + /* -2..2 */ + if (params->eta == DILITHIUM_ETA_2) { + dilithium_decode_eta_2_bits(s2pt, s2); + s2pt += DILITHIUM_ETA_2_BITS * DILITHIUM_N / 8; + } + #endif + #ifndef WOLFSSL_NO_ML_DSA_65 + /* -4..4 */ + if (params->eta == DILITHIUM_ETA_4) { + dilithium_decode_eta_4_bits(s2pt, s2); + s2pt += DILITHIUM_N / 2; + } + #endif + dilithium_ntt_small(s2); + /* Step 20: cs2 = NTT-1(c o s2) */ + dilithium_mul(cs2, c, s2); + dilithium_invntt(cs2); + /* Step 22: w0 - cs2 */ + dilithium_sub(w0t, cs2); + dilithium_poly_red(w0t); + /* Step 23: Check w0 - cs2 has low enough values. */ + hi = params->gamma2 - params->beta; + valid = dilithium_check_low(w0t, hi); + if (valid) { + dilithium_decode_t0(t0pt, t0); + dilithium_ntt(t0); + + /* Step 25: ct0 = NTT-1(c o t0) */ + dilithium_mul(ct0, c, t0); + dilithium_invntt(ct0); + /* Step 27: Check ct0 has low enough values. */ + valid = dilithium_check_low(ct0, params->gamma2); + } + if (valid) { + /* Step 26: ct0 = ct0 + w0 */ + dilithium_add(ct0, w0t); + dilithium_poly_red(ct0); + + /* Step 26, 27: Make hint from ct0 and w1 and check + * number of hints is valid. + * Step 32: h is encoded into signature. + */ + #ifndef WOLFSSL_NO_ML_DSA_44 + if (params->gamma2 == DILITHIUM_Q_LOW_88) { + valid = (dilithium_make_hint_88(ct0, w1t, h, + &idx) == 0); + /* Alg 14, Step 10: Store count of hints for + * polynomial at end of list. */ + h[PARAMS_ML_DSA_44_OMEGA + r] = idx; + } + #endif + #if !defined(WOLFSSL_NO_ML_DSA_65) || \ + !defined(WOLFSSL_NO_ML_DSA_87) + if (params->gamma2 == DILITHIUM_Q_LOW_32) { + valid = (dilithium_make_hint_32(ct0, w1t, + params->omega, h, &idx) == 0); + /* Alg 14, Step 10: Store count of hints for + * polynomial at end of list. */ + h[params->omega + r] = idx; + } + #endif + } + + t0pt += DILITHIUM_D * DILITHIUM_N / 8; + w0t += DILITHIUM_N; + w1t += DILITHIUM_N; + } + /* Set remaining hints to zero. */ + XMEMSET(h + idx, 0, params->omega - idx); + } + + if (!valid) { + /* Too many attempts - something wrong with implementation. */ + if ((kappa > (word16)(kappa + params->l))) { + ret = BAD_COND_E; + } + + /* Step 30: increment value to append to seed to unique value. + */ + kappa += params->l; + } + } + /* Step 11: Check we have a valid signature. */ + while ((ret == 0) && (!valid)); + } + + XFREE(y, NULL, DYNAMIC_TYPE_DILITHIUM); + return ret; +#endif } /* Sign a message with the key and a random number generator. @@ -5447,10 +6151,10 @@ static void dilithium_make_pub_vec(dilithium_key* key, sword32* t1) * @return MEMORY_E when memory allocation fails. * @return Other negative when an error occurs. */ -#ifndef WOLFSSL_DILITHIUM_VERIFY_SMALL_MEM static int dilithium_verify_msg(dilithium_key* key, const byte* msg, word32 msgLen, const byte* sig, word32 sigLen, int* res) { +#ifndef WOLFSSL_DILITHIUM_VERIFY_SMALL_MEM int ret = 0; const wc_dilithium_params* params = key->params; const byte* pub_seed = key->p; @@ -5538,7 +6242,7 @@ static int dilithium_verify_msg(dilithium_key* key, const byte* msg, dilithium_vec_decode_gamma1(ze, params->l, params->gamma1_bits, z); /* Step 13: Check z is valid - values are low enough. */ hi = (1 << params->gamma1_bits) - params->beta; - valid = dilithium_check_low(z, params->l, hi); + valid = dilithium_vec_check_low(z, params->l, hi); } if ((ret == 0) && valid) { #ifdef WC_DILITHIUM_CACHE_PUB_VECTORS @@ -5603,11 +6307,7 @@ static int dilithium_verify_msg(dilithium_key* key, const byte* msg, *res = valid; XFREE(z, NULL, DYNAMIC_TYPE_DILITHIUM); return ret; -} #else -static int dilithium_verify_msg(dilithium_key* key, const byte* msg, - word32 msgLen, const byte* sig, word32 sigLen, int* res) -{ int ret = 0; const wc_dilithium_params* params = key->params; const byte* pub_seed = key->p; @@ -5674,7 +6374,7 @@ static int dilithium_verify_msg(dilithium_key* key, const byte* msg, dilithium_vec_decode_gamma1(ze, params->l, params->gamma1_bits, z); /* Step 13: Check z is valid - values are low enough. */ hi = (1 << params->gamma1_bits) - params->beta; - valid = dilithium_check_low(z, params->l, hi); + valid = dilithium_vec_check_low(z, params->l, hi); } if ((ret == 0) && valid) { /* Step 10: NTT(z) */ @@ -5812,8 +6512,8 @@ static int dilithium_verify_msg(dilithium_key* key, const byte* msg, XFREE(z, NULL, DYNAMIC_TYPE_DILITHIUM); #endif return ret; -} #endif /* !WOLFSSL_DILITHIUM_VERIFY_SMALL_MEM */ +} #endif /* WOLFSSL_DILITHIUM_NO_VERIFY */