From 71325a2a32278805c43d8fe5fa99d9d3cf3fa15d Mon Sep 17 00:00:00 2001 From: David Garske Date: Fri, 13 Dec 2024 13:52:53 -0800 Subject: [PATCH] Revert "Aarch64: make code compile when no hardware crypto avail" --- .wolfssl_known_macro_extras | 2 - configure.ac | 11 +- wolfcrypt/benchmark/benchmark.c | 46 - wolfcrypt/src/aes.c | 193 +-- wolfcrypt/src/cpuid.c | 205 +-- wolfcrypt/src/port/arm/armv8-aes.c | 1898 ++++++++++++------------- wolfcrypt/src/port/arm/armv8-sha256.c | 209 +-- wolfssl/wolfcrypt/aes.h | 62 +- wolfssl/wolfcrypt/cpuid.h | 25 - 9 files changed, 906 insertions(+), 1745 deletions(-) diff --git a/.wolfssl_known_macro_extras b/.wolfssl_known_macro_extras index 9c32e038da..d0d1aadfbb 100644 --- a/.wolfssl_known_macro_extras +++ b/.wolfssl_known_macro_extras @@ -546,7 +546,6 @@ WOLFCRYPT_FIPS_CORE_DYNAMIC_HASH_VALUE WOLFSENTRY_H WOLFSENTRY_NO_JSON WOLFSSL_32BIT_MILLI_TIME -WOLFSSL_AARCH64_PRIVILEGE_MODE WOLFSSL_AESNI_BY4 WOLFSSL_AESNI_BY6 WOLFSSL_AFTER_DATE_CLOCK_SKEW @@ -905,7 +904,6 @@ __MINGW32__ __MINGW64_VERSION_MAJOR __MINGW64__ __MWERKS__ -__OpenBSD__ __PIE__ __POWERPC__ __PPC__ diff --git a/configure.ac b/configure.ac index 3186640901..3808f37317 100644 --- a/configure.ac +++ b/configure.ac @@ -2974,7 +2974,6 @@ then fi -ENABLED_ARMASM_CRYPTO="unknown" ENABLED_ARMASM_INLINE="no" ENABLED_ARMASM_SHA3="no" ENABLED_ARMASM_CRYPTO_SM4="no" @@ -2996,9 +2995,6 @@ then inline) ENABLED_ARMASM_INLINE=yes ;; - no-crypto) - ENABLED_ARMASM_CRYPTO=no - ;; sha512-crypto | sha3-crypto) case $host_cpu in *aarch64*) @@ -3074,9 +3070,7 @@ then esac # Include options.h AM_CCASFLAGS="$AM_CCASFLAGS -DEXTERNAL_OPTS_OPENVPN" - if test "$ENABLED_ARMASM_CRYPTO" = "unknown"; then - ENABLED_ARMASM_CRYPTO=yes - fi + ENABLED_ARMASM_CRYPTO=yes ENABLED_ARMASM_NEON=yes ENABLED_ARM_64=yes @@ -3177,9 +3171,6 @@ fi if test "$ENABLED_ARMASM_SM4" = "yes"; then AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_ARMASM_CRYPTO_SM4" fi -if test "$ENABLED_ARMASM_CRYPTO" = "unknown"; then - ENABLED_ARMASM_CRYPTO=no -fi if test "$ENABLED_ARMASM_CRYPTO" = "no"; then AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_ARMASM_NO_HW_CRYPTO" fi diff --git a/wolfcrypt/benchmark/benchmark.c b/wolfcrypt/benchmark/benchmark.c index 28e8e9e5f1..6ff2db5f0f 100644 --- a/wolfcrypt/benchmark/benchmark.c +++ b/wolfcrypt/benchmark/benchmark.c @@ -229,8 +229,6 @@ #include #endif -#include - #ifdef USE_FLAT_BENCHMARK_H #include "benchmark.h" #else @@ -3941,46 +3939,6 @@ static void* benchmarks_do(void* args) return NULL; } -#if defined(HAVE_CPUID) && defined(WOLFSSL_TEST_STATIC_BUILD) -static void print_cpu_features(void) -{ - word32 cpuid_flags = cpuid_get_flags(); - - printf("CPU: "); -#ifdef HAVE_CPUID_INTEL - printf("Intel"); -#ifdef WOLFSSL_X86_64_BUILD - printf(" x86_64"); -#else - printf(" x86"); -#endif - printf(" -"); - if (IS_INTEL_AVX1(cpuid_flags)) printf(" avx1"); - if (IS_INTEL_AVX2(cpuid_flags)) printf(" avx2"); - if (IS_INTEL_RDRAND(cpuid_flags)) printf(" rdrand"); - if (IS_INTEL_RDSEED(cpuid_flags)) printf(" rdseed"); - if (IS_INTEL_BMI2(cpuid_flags)) printf(" bmi2"); - if (IS_INTEL_AESNI(cpuid_flags)) printf(" aesni"); - if (IS_INTEL_ADX(cpuid_flags)) printf(" adx"); - if (IS_INTEL_MOVBE(cpuid_flags)) printf(" movbe"); - if (IS_INTEL_BMI1(cpuid_flags)) printf(" bmi1"); - if (IS_INTEL_SHA(cpuid_flags)) printf(" sha"); -#endif -#ifdef __aarch64__ - printf("Aarch64 -"); - if (IS_AARCH64_AES(cpuid_flags)) printf(" aes"); - if (IS_AARCH64_PMULL(cpuid_flags)) printf(" pmull"); - if (IS_AARCH64_SHA256(cpuid_flags)) printf(" sha256"); - if (IS_AARCH64_SHA512(cpuid_flags)) printf(" sha512"); - if (IS_AARCH64_RDM(cpuid_flags)) printf(" rdm"); - if (IS_AARCH64_SHA3(cpuid_flags)) printf(" sha3"); - if (IS_AARCH64_SM3(cpuid_flags)) printf(" sm3"); - if (IS_AARCH64_SM4(cpuid_flags)) printf(" sm4"); -#endif - printf("\n"); -} -#endif - int benchmark_init(void) { int ret = 0; @@ -4001,10 +3959,6 @@ int benchmark_init(void) return EXIT_FAILURE; } -#if defined(HAVE_CPUID) && defined(WOLFSSL_TEST_STATIC_BUILD) - print_cpu_features(); -#endif - #ifdef HAVE_WC_INTROSPECTION printf("Math: %s\n", wc_GetMathInfo()); #endif diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index a8b7651835..fa57d7685a 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -106,7 +106,7 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits #include #endif -#if !defined(WOLFSSL_RISCV_ASM) +#if !defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_RISCV_ASM) #ifdef WOLFSSL_IMX6_CAAM_BLOB /* case of possibly not using hardware acceleration for AES but using key @@ -787,26 +787,6 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits } #endif /* HAVE_AES_DECRYPT */ -#elif defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \ - !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - - #define NEED_AES_TABLES - - static int checkedCpuIdFlags = 0; - static word32 cpuid_flags = 0; - - static void Check_CPU_support_HwCrypto(Aes* aes) - { - if (checkedCpuIdFlags == 0) { - cpuid_flags = cpuid_get_flags(); - checkedCpuIdFlags = 1; - } - aes->use_aes_hw_crypto = IS_AARCH64_AES(cpuid_flags); - #ifdef HAVE_AESGCM - aes->use_pmull_hw_crypto = IS_AARCH64_PMULL(cpuid_flags); - #endif - } - #elif (defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) \ && !defined(WOLFSSL_QNX_CAAM)) || \ ((defined(WOLFSSL_AFALG) || defined(WOLFSSL_DEVCRYPTO_AES)) && \ @@ -2895,13 +2875,6 @@ static WARN_UNUSED_RESULT int wc_AesEncrypt( printf("Skipping AES-NI\n"); #endif } -#elif defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \ - !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - if (aes->use_aes_hw_crypto) { - AES_encrypt_AARCH64(inBlock, outBlock, (byte*)aes->key, - (int)aes->rounds); - return 0; - } #endif /* WOLFSSL_AESNI */ #if defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES) AES_ECB_encrypt(aes, inBlock, outBlock, WC_AES_BLOCK_SIZE); @@ -3657,13 +3630,6 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt( printf("Skipping AES-NI\n"); #endif } -#elif defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \ - !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - if (aes->use_aes_hw_crypto) { - AES_decrypt_AARCH64(inBlock, outBlock, (byte*)aes->key, - (int)aes->rounds); - return 0; - } #endif /* WOLFSSL_AESNI */ #if defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES) return AES_ECB_decrypt(aes, inBlock, outBlock, WC_AES_BLOCK_SIZE); @@ -4614,14 +4580,6 @@ static void AesSetKey_C(Aes* aes, const byte* key, word32 keySz, int dir) } #endif /* WOLFSSL_AESNI */ - #if defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \ - !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - Check_CPU_support_HwCrypto(aes); - if (aes->use_aes_hw_crypto) { - return AES_set_key_AARCH64(userKey, keylen, aes, dir); - } - #endif - #ifdef WOLFSSL_KCAPI_AES XMEMCPY(aes->devKey, userKey, keylen); if (aes->init != 0) { @@ -5817,14 +5775,6 @@ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) } } else - #elif defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \ - !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - if (aes->use_aes_hw_crypto) { - AES_CBC_encrypt_AARCH64(in, out, sz, (byte*)aes->reg, - (byte*)aes->key, (int)aes->rounds); - ret = 0; - } - else #endif { ret = 0; @@ -5965,14 +5915,6 @@ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) ret = 0; } else - #elif defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \ - !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - if (aes->use_aes_hw_crypto) { - AES_CBC_decrypt_AARCH64(in, out, sz, (byte*)aes->reg, - (byte*)aes->key, (int)aes->rounds); - ret = 0; - } - else #endif { ret = 0; @@ -6311,14 +6253,6 @@ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) aes->left -= processed; sz -= processed; - #if defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \ - !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - if (aes->use_aes_hw_crypto) { - AES_CTR_encrypt_AARCH64(aes, out, in, sz); - return 0; - } - #endif - VECTOR_REGISTERS_PUSH; #if defined(HAVE_AES_ECB) && !defined(WOLFSSL_PIC32MZ_CRYPT) && \ @@ -6407,7 +6341,7 @@ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) #endif /* NEED_AES_CTR_SOFT */ #endif /* WOLFSSL_AES_COUNTER */ -#endif /* !WOLFSSL_RISCV_ASM */ +#endif /* !WOLFSSL_ARMASM && ! WOLFSSL_RISCV_ASM */ /* @@ -6454,7 +6388,10 @@ static WC_INLINE void IncCtr(byte* ctr, word32 ctrSz) #endif -#if defined(WOLFSSL_RISCV_ASM) +#ifdef WOLFSSL_ARMASM + /* implementation is located in wolfcrypt/src/port/arm/armv8-aes.c */ + +#elif defined(WOLFSSL_RISCV_ASM) /* implemented in wolfcrypt/src/port/risc-v/riscv-64-aes.c */ #elif defined(WOLFSSL_AFALG) @@ -6664,13 +6601,6 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) return ret; #endif /* WOLFSSL_RENESAS_RSIP && WOLFSSL_RENESAS_FSPSM_CRYPTONLY*/ -#if defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \ - !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - if (ret == 0 && aes->use_aes_hw_crypto && aes->use_pmull_hw_crypto) { - AES_GCM_set_key_AARCH64(aes, iv); - } - else -#endif #if !defined(FREESCALE_LTC_AES_GCM) if (ret == 0) { VECTOR_REGISTERS_PUSH; @@ -7388,8 +7318,6 @@ void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c, */ #define GHASH_INIT_EXTRA(aes) WC_DO_NOTHING -#if !defined(__aarch64__) || !defined(WOLFSSL_ARMASM) || \ - defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) /* GHASH one block of data.. * * XOR block into tag and GMULT with H using pre-computed table. @@ -7403,7 +7331,6 @@ void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c, GMULT(AES_TAG(aes), (aes)->gcm.M0); \ } \ while (0) -#endif #endif /* WOLFSSL_AESGCM_STREAM */ #elif defined(WORD64_AVAILABLE) && !defined(GCM_WORD32) @@ -7999,17 +7926,8 @@ static void GHASH_INIT(Aes* aes) { /* Reset counts of AAD and cipher text. */ aes->aOver = 0; aes->cOver = 0; -#if defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \ - !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - if (aes->use_aes_hw_crypto && aes->use_pmull_hw_crypto) { - ; /* Don't do extra initialization. */ - } - else -#endif - { - /* Extra initialization based on implementation. */ - GHASH_INIT_EXTRA(aes); - } + /* Extra initialization based on implementation. */ + GHASH_INIT_EXTRA(aes); } /* Update the GHASH with AAD and/or cipher text. @@ -8670,14 +8588,6 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, } } else -#elif defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \ - !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - if (aes->use_aes_hw_crypto && aes->use_pmull_hw_crypto) { - AES_GCM_encrypt_AARCH64(aes, out, in, sz, iv, ivSz, authTag, authTagSz, - authIn, authInSz); - ret = 0; - } - else #endif /* WOLFSSL_AESNI */ { ret = AES_GCM_encrypt_C(aes, out, in, sz, iv, ivSz, authTag, authTagSz, @@ -9262,13 +9172,6 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, } } else -#elif defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \ - !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - if (aes->use_aes_hw_crypto && aes->use_pmull_hw_crypto) { - ret = AES_GCM_decrypt_AARCH64(aes, out, in, sz, iv, ivSz, authTag, - authTagSz, authIn, authInSz); - } - else #endif /* WOLFSSL_AESNI */ { ret = AES_GCM_decrypt_C(aes, out, in, sz, iv, ivSz, authTag, authTagSz, @@ -10183,20 +10086,7 @@ int wc_AesGcmInit(Aes* aes, const byte* key, word32 len, const byte* iv, RESTORE_VECTOR_REGISTERS(); } else - #elif defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \ - !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - if (aes->use_aes_hw_crypto) { - AES_GCM_init_AARCH64(aes, iv, ivSz); - - /* Reset state fields. */ - aes->over = 0; - aes->aSz = 0; - aes->cSz = 0; - /* Initialization for GHASH. */ - GHASH_INIT(aes); - } - else - #endif /* WOLFSSL_AESNI */ + #endif { ret = AesGcmInit_C(aes, iv, ivSz); } @@ -10322,13 +10212,6 @@ int wc_AesGcmEncryptUpdate(Aes* aes, byte* out, const byte* in, word32 sz, RESTORE_VECTOR_REGISTERS(); } else - #elif defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \ - !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - if (aes->use_aes_hw_crypto) { - AES_GCM_crypt_update_AARCH64(aes, out, in, sz); - GHASH_UPDATE_AARCH64(aes, authIn, authInSz, out, sz); - } - else #endif { /* Encrypt the plaintext. */ @@ -10382,12 +10265,6 @@ int wc_AesGcmEncryptFinal(Aes* aes, byte* authTag, word32 authTagSz) RESTORE_VECTOR_REGISTERS(); } else - #elif defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \ - !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - if (aes->use_aes_hw_crypto) { - AES_GCM_final_AARCH64(aes, authTag, authTagSz); - } - else #endif { ret = AesGcmFinal_C(aes, authTag, authTagSz); @@ -10471,13 +10348,6 @@ int wc_AesGcmDecryptUpdate(Aes* aes, byte* out, const byte* in, word32 sz, RESTORE_VECTOR_REGISTERS(); } else - #elif defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \ - !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - if (aes->use_aes_hw_crypto) { - GHASH_UPDATE_AARCH64(aes, authIn, authInSz, in, sz); - AES_GCM_crypt_update_AARCH64(aes, out, in, sz); - } - else #endif { /* Update the authentication tag with any authentication data and @@ -10529,17 +10399,6 @@ int wc_AesGcmDecryptFinal(Aes* aes, const byte* authTag, word32 authTagSz) RESTORE_VECTOR_REGISTERS(); } else - #elif defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \ - !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - if (aes->use_aes_hw_crypto) { - ALIGN32 byte calcTag[WC_AES_BLOCK_SIZE]; - AES_GCM_final_AARCH64(aes, calcTag, authTagSz); - /* Check calculated tag matches the one passed in. */ - if (ConstantCompare(authTag, calcTag, (int)authTagSz) != 0) { - ret = AES_GCM_AUTH_E; - } - } - else #endif { ALIGN32 byte calcTag[WC_AES_BLOCK_SIZE]; @@ -10816,7 +10675,10 @@ int wc_AesCcmCheckTagSize(int sz) return 0; } -#if defined(WOLFSSL_RISCV_ASM) +#ifdef WOLFSSL_ARMASM + /* implementation located in wolfcrypt/src/port/arm/armv8-aes.c */ + +#elif defined(WOLFSSL_RISCV_ASM) /* implementation located in wolfcrypt/src/port/risc-v/riscv-64-aes.c */ #elif defined(HAVE_COLDFIRE_SEC) @@ -11822,12 +11684,6 @@ static WARN_UNUSED_RESULT int _AesEcbEncrypt( AES_ECB_encrypt_AESNI(in, out, sz, (byte*)aes->key, (int)aes->rounds); } else -#elif defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \ - !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - if (aes->use_aes_hw_crypto) { - AES_encrypt_AARCH64(in, out, (byte*)aes->key, (int)aes->rounds); - } - else #endif { #ifdef NEED_AES_TABLES @@ -11880,12 +11736,6 @@ static WARN_UNUSED_RESULT int _AesEcbDecrypt( AES_ECB_decrypt_AESNI(in, out, sz, (byte*)aes->key, (int)aes->rounds); } else -#elif defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \ - !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - if (aes->use_aes_hw_crypto) { - AES_decrypt_AARCH64(in, out, (byte*)aes->key, (int)aes->rounds); - } - else #endif { #ifdef NEED_AES_TABLES @@ -12988,6 +12838,7 @@ void AES_XTS_decrypt_update_avx1(const unsigned char *in, unsigned char *out, wo #endif /* WOLFSSL_AESNI */ +#if !defined(WOLFSSL_ARMASM) || defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) #ifdef HAVE_AES_ECB /* helper function for encrypting / decrypting full buffer at once */ static WARN_UNUSED_RESULT int _AesXtsHelper( @@ -13249,13 +13100,6 @@ int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, RESTORE_VECTOR_REGISTERS(); } else -#elif defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \ - !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - if (aes->use_aes_hw_crypto) { - AES_XTS_encrypt_AARCH64(xaes, out, in, sz, i); - ret = 0; - } - else #endif { ret = AesXtsEncrypt_sw(xaes, out, in, sz, i); @@ -13689,13 +13533,6 @@ int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, RESTORE_VECTOR_REGISTERS(); } else -#elif defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \ - !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - if (aes->use_aes_hw_crypto) { - AES_XTS_decrypt_AARCH64(xaes, out, in, sz, i); - ret = 0; - } - else #endif { ret = AesXtsDecrypt_sw(xaes, out, in, sz, i); @@ -13893,6 +13730,8 @@ int wc_AesXtsDecryptFinal(XtsAes* xaes, byte* out, const byte* in, word32 sz, #endif /* WOLFSSL_AESXTS_STREAM */ +#endif /* !WOLFSSL_ARMASM || WOLFSSL_ARMASM_NO_HW_CRYPTO */ + /* Same as wc_AesXtsEncryptSector but the sector gets incremented by one every * sectorSz bytes * diff --git a/wolfcrypt/src/cpuid.c b/wolfcrypt/src/cpuid.c index 2e63a092bf..67223860c8 100644 --- a/wolfcrypt/src/cpuid.c +++ b/wolfcrypt/src/cpuid.c @@ -28,8 +28,7 @@ #include -#if defined(HAVE_CPUID) || defined(HAVE_CPUID_INTEL) || \ - defined(HAVE_CPUID_AARCH64) +#if defined(HAVE_CPUID) || defined(HAVE_CPUID_INTEL) static word32 cpuid_check = 0; static word32 cpuid_flags = 0; #endif @@ -102,208 +101,6 @@ cpuid_check = 1; } } -#elif defined(HAVE_CPUID_AARCH64) - -#define CPUID_AARCH64_FEAT_AES ((word64)1 << 4) -#define CPUID_AARCH64_FEAT_PMULL ((word64)1 << 5) -#define CPUID_AARCH64_FEAT_SHA256 ((word64)1 << 12) -#define CPUID_AARCH64_FEAT_SHA256_512 ((word64)1 << 13) -#define CPUID_AARCH64_FEAT_RDM ((word64)1 << 28) -#define CPUID_AARCH64_FEAT_SHA3 ((word64)1 << 32) -#define CPUID_AARCH64_FEAT_SM3 ((word64)1 << 36) -#define CPUID_AARCH64_FEAT_SM4 ((word64)1 << 40) - -#ifdef WOLFSSL_AARCH64_PRIVILEGE_MODE - /* https://developer.arm.com/documentation/ddi0601/2024-09/AArch64-Registers - * /ID-AA64ISAR0-EL1--AArch64-Instruction-Set-Attribute-Register-0 */ - - void cpuid_set_flags(void) - { - if (!cpuid_check) { - word64 features; - - __asm__ __volatile ( - "mrs %[feat], ID_AA64ISAR0_EL1\n" - : [feat] "=r" (features) - : - : - ); - - if (features & CPUID_AARCH64_FEAT_AES) - cpuid_flags |= CPUID_AES; - if (features & CPUID_AARCH64_FEAT_PMULL) - cpuid_flags |= CPUID_PMULL; - if (features & CPUID_AARCH64_FEAT_SHA256) - cpuid_flags |= CPUID_SHA256; - if (features & CPUID_AARCH64_FEAT_SHA256_512) - cpuid_flags |= CPUID_SHA256 | CPUID_SHA512; - if (features & CPUID_AARCH64_FEAT_RDM) - cpuid_flags |= CPUID_RDM; - if (features & CPUID_AARCH64_FEAT_SHA3) - cpuid_flags |= CPUID_SHA3; - if (features & CPUID_AARCH64_FEAT_SM3) - cpuid_flags |= CPUID_SM3; - if (features & CPUID_AARCH64_FEAT_SM4) - cpuid_flags |= CPUID_SM4; - - cpuid_check = 1; - } - } -#elif defined(__linux__) - /* https://community.arm.com/arm-community-blogs/b/operating-systems-blog/ - * posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu */ - - #include - #include - - void cpuid_set_flags(void) - { - if (!cpuid_check) { - word64 hwcaps = getauxval(AT_HWCAP); - - if (hwcaps & HWCAP_AES) - cpuid_flags |= CPUID_AES; - if (hwcaps & HWCAP_PMULL) - cpuid_flags |= CPUID_PMULL; - if (hwcaps & HWCAP_SHA2) - cpuid_flags |= CPUID_SHA256; - if (hwcaps & HWCAP_SHA512) - cpuid_flags |= CPUID_SHA512; - if (hwcaps & HWCAP_ASIMDRDM) - cpuid_flags |= CPUID_RDM; - if (hwcaps & HWCAP_SHA3) - cpuid_flags |= CPUID_SHA3; - if (hwcaps & HWCAP_SM3) - cpuid_flags |= CPUID_SM3; - if (hwcaps & HWCAP_SM4) - cpuid_flags |= CPUID_SM4; - - cpuid_check = 1; - } - } -#elif defined(__ANDROID__) || defined(ANDROID) - /* https://community.arm.com/arm-community-blogs/b/operating-systems-blog/ - * posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu */ - - #include "cpu-features.h" - - void cpuid_set_flags(void) - { - if (!cpuid_check) { - word64 features = android_getCpuFeatures(); - - if (features & ANDROID_CPU_ARM_FEATURE_AES) - cpuid_flags |= CPUID_AES; - if (features & ANDROID_CPU_ARM_FEATURE_PMULL) - cpuid_flags |= CPUID_PMULL; - if (features & ANDROID_CPU_ARM_FEATURE_SHA2) - cpuid_flags |= CPUID_SHA256; - - cpuid_check = 1; - } - } -#elif defined(__APPLE__) - /* https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/ - * determining_instruction_set_characteristics */ - - #include - - static word64 cpuid_get_sysctlbyname(const char* name) - { - word64 ret = 0; - size_t size = sizeof(ret); - - sysctlbyname(name, &ret, &size, NULL, 0); - - return ret; - } - - void cpuid_set_flags(void) - { - if (!cpuid_check) { - if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_AES") != 0) - cpuid_flags |= CPUID_AES; - if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_PMULL") != 0) - cpuid_flags |= CPUID_PMULL; - if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_SHA256") != 0) - cpuid_flags |= CPUID_SHA256; - if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_SHA512") != 0) - cpuid_flags |= CPUID_SHA512; - if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_RDM") != 0) - cpuid_flags |= CPUID_RDM; - if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_SHA3") != 0) - cpuid_flags |= CPUID_SHA3; - #ifdef WOLFSSL_ARMASM_CRYPTO_SM3 - cpuid_flags |= CPUID_SM3; - #endif - #ifdef WOLFSSL_ARMASM_CRYPTO_SM4 - cpuid_flags |= CPUID_SM4; - #endif - - cpuid_check = 1; - } - } -#elif defined(__FreeBSD__) || defined(__OpenBSD__) - /* https://man.freebsd.org/cgi/man.cgi?elf_aux_info(3) */ - - #include - - void cpuid_set_flags(void) - { - if (!cpuid_check) { - word64 features = 0; - - elf_aux_info(AT_HWCAP, &features, sizeof(features)); - - if (features & CPUID_AARCH64_FEAT_AES) - cpuid_flags |= CPUID_AES; - if (features & CPUID_AARCH64_FEAT_PMULL) - cpuid_flags |= CPUID_PMULL; - if (features & CPUID_AARCH64_FEAT_SHA256) - cpuid_flags |= CPUID_SHA256; - if (features & CPUID_AARCH64_FEAT_SHA256_512) - cpuid_flags |= CPUID_SHA256 | CPUID_SHA512; - if (features & CPUID_AARCH64_FEAT_RDM) - cpuid_flags |= CPUID_RDM; - if (features & CPUID_AARCH64_FEAT_SHA3) - cpuid_flags |= CPUID_SHA3; - if (features & CPUID_AARCH64_FEAT_SM3) - cpuid_flags |= CPUID_SM3; - if (features & CPUID_AARCH64_FEAT_SM4) - cpuid_flags |= CPUID_SM4; - - cpuid_check = 1; - } - } -#else - void cpuid_set_flags(void) - { - if (!cpuid_check) { - - #ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO - cpuid_flags |= CPUID_AES; - cpuid_flags |= CPUID_PMULL; - cpuid_flags |= CPUID_SHA256; - #endif - #ifdef WOLFSSL_ARMASM_CRYPTO_SHA512 - cpuid_flags |= CPUID_SHA512; - #endif - #ifndef WOLFSSL_AARCH64_NO_SQRMLSH - cpuid_flags |= CPUID_RDM; - #endif - #ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 - cpuid_flags |= CPUID_SHA3; - #endif - #ifdef WOLFSSL_ARMASM_CRYPTO_SM3 - cpuid_flags |= CPUID_SM3; - #endif - #ifdef WOLFSSL_ARMASM_CRYPTO_SM4 - cpuid_flags |= CPUID_SM4; - #endif - cpuid_check = 1; - } - } -#endif #elif defined(HAVE_CPUID) void cpuid_set_flags(void) { diff --git a/wolfcrypt/src/port/arm/armv8-aes.c b/wolfcrypt/src/port/arm/armv8-aes.c index 9ae90e8cfa..0eca6775e8 100644 --- a/wolfcrypt/src/port/arm/armv8-aes.c +++ b/wolfcrypt/src/port/arm/armv8-aes.c @@ -175,20 +175,48 @@ static WC_INLINE void FlattenSzInBits(byte* buf, word32 sz) #endif /* HAVE_AESGCM */ -int AES_set_key_AARCH64(const unsigned char *userKey, const int keylen, - Aes* aes, int dir) +/* Similar to wolfSSL software implementation of expanding the AES key. + * Changed out the locations of where table look ups where made to + * use hardware instruction. Also altered decryption key to match. */ +int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) { word32 temp; - word32* rk = aes->key; + word32 *rk; unsigned int i = 0; +#if defined(AES_MAX_KEY_SIZE) + const word32 max_key_len = (AES_MAX_KEY_SIZE / 8); +#endif + + if (!((keylen == 16) || (keylen == 24) || (keylen == 32)) || + aes == NULL || userKey == NULL) + return BAD_FUNC_ARG; + + rk = aes->key; +#if defined(AES_MAX_KEY_SIZE) + /* Check key length */ + if (keylen > max_key_len) { + return BAD_FUNC_ARG; + } +#endif + + #if defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_CFB) || \ + defined(WOLFSSL_AES_OFB) || defined(WOLFSSL_AES_XTS) + aes->left = 0; + #endif /* WOLFSSL_AES_COUNTER */ + + aes->keylen = keylen; + aes->rounds = keylen/4 + 6; XMEMCPY(rk, userKey, keylen); - switch (keylen) { + switch(keylen) + { #if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 128 && \ defined(WOLFSSL_AES_128) case 16: - while (1) { + while (1) + { temp = rk[3]; SBOX(temp); temp = rotrFixed(temp, 8); @@ -207,7 +235,8 @@ int AES_set_key_AARCH64(const unsigned char *userKey, const int keylen, defined(WOLFSSL_AES_192) case 24: /* for (;;) here triggers a bug in VC60 SP4 w/ Pro Pack */ - while (1) { + while (1) + { temp = rk[5]; SBOX(temp); temp = rotrFixed(temp, 8); @@ -227,7 +256,8 @@ int AES_set_key_AARCH64(const unsigned char *userKey, const int keylen, #if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 256 && \ defined(WOLFSSL_AES_256) case 32: - while (1) { + while (1) + { temp = rk[7]; SBOX(temp); temp = rotrFixed(temp, 8); @@ -253,7 +283,8 @@ int AES_set_key_AARCH64(const unsigned char *userKey, const int keylen, return BAD_FUNC_ARG; } - if (dir == AES_DECRYPTION) { + if (dir == AES_DECRYPTION) + { #ifdef HAVE_AES_DECRYPT unsigned int j; rk = aes->key; @@ -277,10 +308,9 @@ int AES_set_key_AARCH64(const unsigned char *userKey, const int keylen, #endif /* HAVE_AES_DECRYPT */ } - return 0; + return wc_AesSetIV(aes, iv); } -#ifndef __aarch64__ #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, const byte* iv, int dir) @@ -302,545 +332,611 @@ int wc_AesSetIV(Aes* aes, const byte* iv) return 0; } -#endif + #ifdef __aarch64__ /* AES CCM/GCM use encrypt direct but not decrypt */ #if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ - defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || \ - defined(HAVE_AES_CBC) + defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) + static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + word32* keyPt = aes->key; -void AES_encrypt_AARCH64(const byte* inBlock, byte* outBlock, byte* key, int nr) -{ - /* - AESE exor's input with round key - shift rows of exor'ed result - sub bytes for shifted rows - */ + /* + AESE exor's input with round key + shift rows of exor'ed result + sub bytes for shifted rows + */ - __asm__ __volatile__ ( - "LD1 {v0.16b}, [%[in]] \n" - "LD1 {v1.2d-v4.2d}, [%[key]], #64 \n" + __asm__ __volatile__ ( + "LD1 {v0.16b}, [%[CtrIn]] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + + "LD1 {v1.2d-v2.2d}, [%[Key]], #32 \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + + "#subtract rounds done so far and see if should continue\n" + "MOV w12, %w[R] \n" + "SUB w12, w12, #10 \n" + "CBZ w12, 1f \n" + "LD1 {v1.2d-v2.2d}, [%[Key]], #32 \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + + "SUB w12, w12, #2 \n" + "CBZ w12, 1f \n" + "LD1 {v1.2d-v2.2d}, [%[Key]], #32 \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" + "#Final AddRoundKey then store result \n" + "1: \n" + "LD1 {v1.2d}, [%[Key]], #16 \n" + "EOR v0.16b, v0.16b, v1.16b \n" + "ST1 {v0.16b}, [%[CtrOut]] \n" - "LD1 {v1.2d-v4.2d}, [%[key]], #64 \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" + :[CtrOut] "=r" (outBlock), "=r" (keyPt), "=r" (aes->rounds), + "=r" (inBlock) + :"0" (outBlock), [Key] "1" (keyPt), [R] "2" (aes->rounds), + [CtrIn] "3" (inBlock) + : "cc", "memory", "w12", "v0", "v1", "v2", "v3", "v4" + ); - "LD1 {v1.2d-v2.2d}, [%[key]], #32 \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v2.16b \n" + return 0; + } +#endif /* AES_GCM, AES_CCM, DIRECT or COUNTER */ +#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) + #ifdef HAVE_AES_DECRYPT + static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + word32* keyPt = aes->key; - "#subtract rounds done so far and see if should continue\n" - "MOV w12, %w[nr] \n" - "SUB w12, w12, #10 \n" - "CBZ w12, 1f \n" - "LD1 {v1.2d-v2.2d}, [%[key]], #32 \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v2.16b \n" + /* + AESE exor's input with round key + shift rows of exor'ed result + sub bytes for shifted rows + */ - "SUB w12, w12, #2 \n" - "CBZ w12, 1f \n" - "LD1 {v1.2d-v2.2d}, [%[key]], #32 \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v2.16b \n" + __asm__ __volatile__ ( + "LD1 {v0.16b}, [%[CtrIn]] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + + "AESD v0.16b, v1.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v2.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v3.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v4.16b \n" + "AESIMC v0.16b, v0.16b \n" + + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "AESD v0.16b, v1.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v2.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v3.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v4.16b \n" + "AESIMC v0.16b, v0.16b \n" + + "LD1 {v1.2d-v2.2d}, [%[Key]], #32 \n" + "AESD v0.16b, v1.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v2.16b \n" + + "#subtract rounds done so far and see if should continue\n" + "MOV w12, %w[R] \n" + "SUB w12, w12, #10 \n" + "CBZ w12, 1f \n" + "LD1 {v1.2d-v2.2d}, [%[Key]], #32 \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v1.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v2.16b \n" + + "SUB w12, w12, #2 \n" + "CBZ w12, 1f \n" + "LD1 {v1.2d-v2.2d}, [%[Key]], #32 \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v1.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v2.16b \n" - "#Final AddRoundKey then store result \n" - "1: \n" - "LD1 {v1.2d}, [%[key]], #16 \n" - "EOR v0.16b, v0.16b, v1.16b \n" - "ST1 {v0.16b}, [%[out]] \n" + "#Final AddRoundKey then store result \n" + "1: \n" + "LD1 {v1.2d}, [%[Key]], #16 \n" + "EOR v0.16b, v0.16b, v1.16b \n" + "ST1 {v0.4s}, [%[CtrOut]] \n" - : [key] "+r" (key) - : [in] "r" (inBlock), [out] "r" (outBlock), [nr] "r" (nr) - : "cc", "memory", "w12", "v0", "v1", "v2", "v3", "v4" - ); -} -#endif /* AES_GCM, AES_CCM, DIRECT or COUNTER */ -#if !defined(WC_AES_BITSLICED) || defined(WOLFSSL_AES_DIRECT) || \ - defined(WOLFSSL_AES_COUNTER) -#ifdef HAVE_AES_DECRYPT -void AES_decrypt_AARCH64(const byte* inBlock, byte* outBlock, byte* key, int nr) -{ - /* - AESE exor's input with round key - shift rows of exor'ed result - sub bytes for shifted rows - */ + :[CtrOut] "=r" (outBlock), "=r" (keyPt), "=r" (aes->rounds), + "=r" (inBlock) + :[Key] "1" (aes->key), "0" (outBlock), [R] "2" (aes->rounds), + [CtrIn] "3" (inBlock) + : "cc", "memory", "w12", "v0", "v1", "v2", "v3", "v4" + ); - __asm__ __volatile__ ( - "LD1 {v0.16b}, [%[in]] \n" - "LD1 {v1.2d-v4.2d}, [%[key]], #64 \n" - - "AESD v0.16b, v1.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v2.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v3.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v4.16b \n" - "AESIMC v0.16b, v0.16b \n" - - "LD1 {v1.2d-v4.2d}, [%[key]], #64 \n" - "AESD v0.16b, v1.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v2.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v3.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v4.16b \n" - "AESIMC v0.16b, v0.16b \n" - - "LD1 {v1.2d-v2.2d}, [%[key]], #32 \n" - "AESD v0.16b, v1.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v2.16b \n" - - "#subtract rounds done so far and see if should continue\n" - "MOV w12, %w[nr] \n" - "SUB w12, w12, #10 \n" - "CBZ w12, 1f \n" - "LD1 {v1.2d-v2.2d}, [%[key]], #32 \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v1.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v2.16b \n" - - "SUB w12, w12, #2 \n" - "CBZ w12, 1f \n" - "LD1 {v1.2d-v2.2d}, [%[key]], #32 \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v1.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v2.16b \n" - - "#Final AddRoundKey then store result \n" - "1: \n" - "LD1 {v1.2d}, [%[key]], #16 \n" - "EOR v0.16b, v0.16b, v1.16b \n" - "ST1 {v0.4s}, [%[out]] \n" - - : [key] "+r" (key) - : [in] "r" (inBlock), [out] "r" (outBlock), [nr] "r" (nr) - : "cc", "memory", "w12", "v0", "v1", "v2", "v3", "v4" - ); + return 0; } -#endif /* HAVE_AES_DECRYPT */ + #endif /* HAVE_AES_DECRYPT */ #endif /* DIRECT or COUNTER */ /* AES-CBC */ #ifdef HAVE_AES_CBC -void AES_CBC_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, - byte* key, int rounds) -{ - word32 numBlocks = sz / AES_BLOCK_SIZE; + int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + word32 numBlocks = sz / AES_BLOCK_SIZE; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + if (sz == 0) { + return 0; + } + +#ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS + if (sz % AES_BLOCK_SIZE) { + return BAD_LENGTH_E; + } +#endif - /* - AESE exor's input with round key - shift rows of exor'ed result + /* do as many block size ops as possible */ + if (numBlocks > 0) { + word32* key = aes->key; + word32* reg = aes->reg; + /* + AESE exor's input with round key + shift rows of exor'ed result sub bytes for shifted rows - note: grouping AESE & AESMC together as pairs reduces latency - */ - switch (rounds) { + note: grouping AESE & AESMC together as pairs reduces latency + */ + switch(aes->rounds) { #ifdef WOLFSSL_AES_128 - case 10: /* AES 128 BLOCK */ - __asm__ __volatile__ ( - "MOV w11, %w[blocks] \n" - "LD1 {v1.2d-v4.2d}, [%[key]], #64 \n" - "LD1 {v5.2d-v8.2d}, [%[key]], #64 \n" - "LD1 {v9.2d-v11.2d},[%[key]], #48 \n" - "LD1 {v0.2d}, [%[reg]] \n" - - "LD1 {v12.2d}, [%[in]], #16 \n" - "1:\n" - "#CBC operations, xorbuf in with current reg \n" - "EOR v0.16b, v0.16b, v12.16b \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v10.16b \n" - "SUB w11, w11, #1 \n" - "EOR v0.16b, v0.16b, v11.16b \n" - "ST1 {v0.2d}, [%[out]], #16 \n" - - "CBZ w11, 2f \n" - "LD1 {v12.2d}, [%[in]], #16 \n" - "B 1b \n" - - "2:\n" - "#store current counter value at the end \n" - "ST1 {v0.2d}, [%[reg]] \n" - - : [out] "+r" (out), [in] "+r" (in), [key] "+r" (key) - : [reg] "r" (reg), [blocks] "r" (numBlocks) - : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", - "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13" - ); - break; + case 10: /* AES 128 BLOCK */ + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "LD1 {v9.2d-v11.2d},[%[Key]], #48 \n" + "LD1 {v0.2d}, [%[reg]] \n" + + "LD1 {v12.2d}, [%[input]], #16 \n" + "1:\n" + "#CBC operations, xorbuf in with current aes->reg \n" + "EOR v0.16b, v0.16b, v12.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "SUB w11, w11, #1 \n" + "EOR v0.16b, v0.16b, v11.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + + "CBZ w11, 2f \n" + "LD1 {v12.2d}, [%[input]], #16 \n" + "B 1b \n" + + "2:\n" + "#store current counter value at the end \n" + "ST1 {v0.2d}, [%[regOut]] \n" + + :[out] "=r" (out), [regOut] "=r" (reg), "=r" (in) + :"0" (out), [Key] "r" (key), [input] "2" (in), + [blocks] "r" (numBlocks), [reg] "1" (reg) + : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13" + ); + break; #endif /* WOLFSSL_AES_128 */ #ifdef WOLFSSL_AES_192 - case 12: /* AES 192 BLOCK */ - __asm__ __volatile__ ( - "MOV w11, %w[blocks] \n" - "LD1 {v1.2d-v4.2d}, [%[key]], #64 \n" - "LD1 {v5.2d-v8.2d}, [%[key]], #64 \n" - "LD1 {v9.2d-v12.2d},[%[key]], #64 \n" - "LD1 {v13.2d}, [%[key]], #16 \n" - "LD1 {v0.2d}, [%[reg]] \n" - - "LD1 {v14.2d}, [%[in]], #16 \n" - "1:\n" - "#CBC operations, xorbuf in with current reg \n" - "EOR v0.16b, v0.16b, v14.16b \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v10.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v11.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v12.16b \n" - "EOR v0.16b, v0.16b, v13.16b \n" - "SUB w11, w11, #1 \n" - "ST1 {v0.2d}, [%[out]], #16 \n" - - "CBZ w11, 2f \n" - "LD1 {v14.2d}, [%[in]], #16\n" - "B 1b \n" - - "2:\n" - "#store current counter value at the end \n" - "ST1 {v0.2d}, [%[reg]] \n" - - : [out] "+r" (out), [in] "+r" (in), [key] "+r" (key) - : [reg] "r" (reg), [blocks] "r" (numBlocks) - : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", - "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14" - ); - break; + case 12: /* AES 192 BLOCK */ + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v1.2d-v4.2d}, %[Key], #64 \n" + "LD1 {v5.2d-v8.2d}, %[Key], #64 \n" + "LD1 {v9.2d-v12.2d},%[Key], #64 \n" + "LD1 {v13.2d}, %[Key], #16 \n" + "LD1 {v0.2d}, %[reg] \n" + + "LD1 {v14.2d}, [%[input]], #16 \n" + "1:\n" + "#CBC operations, xorbuf in with current aes->reg \n" + "EOR v0.16b, v0.16b, v14.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v12.16b \n" + "EOR v0.16b, v0.16b, v13.16b \n" + "SUB w11, w11, #1 \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + + "CBZ w11, 2f \n" + "LD1 {v14.2d}, [%[input]], #16\n" + "B 1b \n" + + "2:\n" + "#store current counter value at the end \n" + "ST1 {v0.2d}, %[regOut] \n" + + + :[out] "=r" (out), [regOut] "=m" (aes->reg), "=r" (in) + :"0" (out), [Key] "m" (aes->key), [input] "2" (in), + [blocks] "r" (numBlocks), [reg] "m" (aes->reg) + : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14" + ); + break; #endif /* WOLFSSL_AES_192*/ #ifdef WOLFSSL_AES_256 - case 14: /* AES 256 BLOCK */ - __asm__ __volatile__ ( - "MOV w11, %w[blocks] \n" - "LD1 {v1.2d-v4.2d}, [%[key]], #64 \n" + case 14: /* AES 256 BLOCK */ + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v1.2d-v4.2d}, %[Key], #64 \n" - "LD1 {v5.2d-v8.2d}, [%[key]], #64 \n" - "LD1 {v9.2d-v12.2d}, [%[key]], #64 \n" - "LD1 {v13.2d-v15.2d}, [%[key]], #48 \n" - "LD1 {v0.2d}, [%[reg]] \n" + "LD1 {v5.2d-v8.2d}, %[Key], #64 \n" + "LD1 {v9.2d-v12.2d}, %[Key], #64 \n" + "LD1 {v13.2d-v15.2d}, %[Key], #48 \n" + "LD1 {v0.2d}, %[reg] \n" - "LD1 {v16.2d}, [%[in]], #16 \n" - "1: \n" - "#CBC operations, xorbuf in with current reg \n" - "EOR v0.16b, v0.16b, v16.16b \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v10.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v11.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v12.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v13.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v14.16b \n" - "EOR v0.16b, v0.16b, v15.16b \n" - "SUB w11, w11, #1 \n" - "ST1 {v0.2d}, [%[out]], #16 \n" - - "CBZ w11, 2f \n" - "LD1 {v16.2d}, [%[in]], #16 \n" - "B 1b \n" + "LD1 {v16.2d}, [%[input]], #16 \n" + "1: \n" + "#CBC operations, xorbuf in with current aes->reg \n" + "EOR v0.16b, v0.16b, v16.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v12.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v13.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "SUB w11, w11, #1 \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + + "CBZ w11, 2f \n" + "LD1 {v16.2d}, [%[input]], #16 \n" + "B 1b \n" - "2: \n" - "#store current counter value at the end \n" - "ST1 {v0.2d}, [%[reg]] \n" - - : [out] "+r" (out), [in] "+r" (in), [key] "+r" (key) - : [reg] "r" (reg), [blocks] "r" (numBlocks) - : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", - "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14","v15", - "v16" - ); - break; + "2: \n" + "#store current counter value at the end \n" + "ST1 {v0.2d}, %[regOut] \n" + + + :[out] "=r" (out), [regOut] "=m" (aes->reg), "=r" (in) + :"0" (out), [Key] "m" (aes->key), [input] "2" (in), + [blocks] "r" (numBlocks), [reg] "m" (aes->reg) + : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14","v15", + "v16" + ); + break; #endif /* WOLFSSL_AES_256 */ + default: + WOLFSSL_MSG("Bad AES-CBC round value"); + return BAD_FUNC_ARG; + } + } + + return 0; } -} -#ifdef HAVE_AES_DECRYPT -void AES_CBC_decrypt_AARCH64(const byte* in, byte* out, word32 sz, - byte* reg, byte* key, int rounds) -{ - word32 numBlocks = sz / AES_BLOCK_SIZE; + #ifdef HAVE_AES_DECRYPT + int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + word32 numBlocks = sz / AES_BLOCK_SIZE; - switch (rounds) { -#ifdef WOLFSSL_AES_128 - case 10: /* AES 128 BLOCK */ - __asm__ __volatile__ ( - "MOV w11, %w[blocks] \n" - "LD1 {v1.2d-v4.2d}, [%[key]], #64 \n" - "LD1 {v5.2d-v8.2d}, [%[key]], #64 \n" - "LD1 {v9.2d-v11.2d},[%[key]], #48 \n" - "LD1 {v13.2d}, [%[reg]] \n" - - "1:\n" - "LD1 {v0.2d}, [%[in]], #16 \n" - "MOV v12.16b, v0.16b \n" - "AESD v0.16b, v1.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v2.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v3.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v4.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v5.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v6.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v7.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v8.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v9.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v10.16b \n" - "EOR v0.16b, v0.16b, v11.16b \n" - - "EOR v0.16b, v0.16b, v13.16b \n" - "SUB w11, w11, #1 \n" - "ST1 {v0.2d}, [%[out]], #16 \n" - "MOV v13.16b, v12.16b \n" - - "CBZ w11, 2f \n" - "B 1b \n" + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } - "2: \n" - "#store current counter value at the end \n" - "ST1 {v13.2d}, [%[reg]] \n" + if (sz == 0) { + return 0; + } - : [out] "+r" (out), [in] "+r" (in), [key] "+r" (key) - : [reg] "r" (reg), [blocks] "r" (numBlocks) - : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", - "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13" - ); - break; -#endif /* WOLFSSL_AES_128 */ -#ifdef WOLFSSL_AES_192 - case 12: /* AES 192 BLOCK */ - __asm__ __volatile__ ( - "MOV w11, %w[blocks] \n" - "LD1 {v1.2d-v4.2d}, [%[key]], #64 \n" - "LD1 {v5.2d-v8.2d}, [%[key]], #64 \n" - "LD1 {v9.2d-v12.2d},[%[key]], #64 \n" - "LD1 {v13.16b}, [%[key]], #16 \n" - "LD1 {v15.2d}, [%[reg]] \n" - - "LD1 {v0.2d}, [%[in]], #16 \n" - "1: \n" - "MOV v14.16b, v0.16b \n" - "AESD v0.16b, v1.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v2.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v3.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v4.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v5.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v6.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v7.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v8.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v9.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v10.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v11.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v12.16b \n" - "EOR v0.16b, v0.16b, v13.16b \n" - - "EOR v0.16b, v0.16b, v15.16b \n" - "SUB w11, w11, #1 \n" - "ST1 {v0.2d}, [%[out]], #16 \n" - "MOV v15.16b, v14.16b \n" - - "CBZ w11, 2f \n" - "LD1 {v0.2d}, [%[in]], #16 \n" - "B 1b \n" - - "2:\n" - "#store current counter value at the end \n" - "ST1 {v15.2d}, [%[reg]] \n" - - : [out] "+r" (out), [in] "+r" (in), [key] "+r" (key) - : [reg] "r" (reg), [blocks] "r" (numBlocks) - : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", - "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15" - ); - break; -#endif /* WOLFSSL_AES_192 */ -#ifdef WOLFSSL_AES_256 - case 14: /* AES 256 BLOCK */ - __asm__ __volatile__ ( - "MOV w11, %w[blocks] \n" - "LD1 {v1.2d-v4.2d}, [%[key]], #64 \n" - "LD1 {v5.2d-v8.2d}, [%[key]], #64 \n" - "LD1 {v9.2d-v12.2d}, [%[key]], #64 \n" - "LD1 {v13.2d-v15.2d}, [%[key]], #48 \n" - "LD1 {v17.2d}, [%[reg]] \n" - - "LD1 {v0.2d}, [%[in]], #16 \n" - "1: \n" - "MOV v16.16b, v0.16b \n" - "AESD v0.16b, v1.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v2.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v3.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v4.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v5.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v6.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v7.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v8.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v9.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v10.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v11.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v12.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v13.16b \n" - "AESIMC v0.16b, v0.16b \n" - "AESD v0.16b, v14.16b \n" - "EOR v0.16b, v0.16b, v15.16b \n" - - "EOR v0.16b, v0.16b, v17.16b \n" - "SUB w11, w11, #1 \n" - "ST1 {v0.2d}, [%[out]], #16 \n" - "MOV v17.16b, v16.16b \n" - - "CBZ w11, 2f \n" - "LD1 {v0.2d}, [%[in]], #16 \n" - "B 1b \n" - - "2:\n" - "#store current counter value at the end \n" - "ST1 {v17.2d}, [%[reg]] \n" - - : [out] "+r" (out), [in] "+r" (in), [key] "+r" (key) - : [reg] "r" (reg), [blocks] "r" (numBlocks) - : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", - "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14","v15", - "v16", "v17" - ); - break; -#endif /* WOLFSSL_AES_256 */ - } -} + if (sz % AES_BLOCK_SIZE) { +#ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS + return BAD_LENGTH_E; +#else + return BAD_FUNC_ARG; #endif + } -#endif /* HAVE_AES_CBC */ + /* do as many block size ops as possible */ + if (numBlocks > 0) { + word32* key = aes->key; + word32* reg = aes->reg; -/* AES-CTR */ -#ifdef WOLFSSL_AES_COUNTER -static void wc_aes_ctr_encrypt_asm(Aes* aes, byte* out, const byte* in, - byte* keyPt, word32 numBlocks) -{ - switch(aes->rounds) { + switch(aes->rounds) { #ifdef WOLFSSL_AES_128 - case 10: /* AES 128 BLOCK */ - __asm__ __volatile__ ( - "MOV w11, %w[blocks] \n" - "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + case 10: /* AES 128 BLOCK */ + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "LD1 {v9.2d-v11.2d},[%[Key]], #48 \n" + "LD1 {v13.2d}, [%[reg]] \n" - "#Create vector with the value 1 \n" - "MOVI v15.16b, #1 \n" - "USHR v15.2d, v15.2d, #56 \n" - "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" - "EOR v14.16b, v14.16b, v14.16b \n" - "EXT v14.16b, v15.16b, v14.16b, #8\n" + "1:\n" + "LD1 {v0.2d}, [%[input]], #16 \n" + "MOV v12.16b, v0.16b \n" + "AESD v0.16b, v1.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v2.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v3.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v4.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v5.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v6.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v7.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v8.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v9.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v10.16b \n" + "EOR v0.16b, v0.16b, v11.16b \n" + + "EOR v0.16b, v0.16b, v13.16b \n" + "SUB w11, w11, #1 \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + "MOV v13.16b, v12.16b \n" + + "CBZ w11, 2f \n" + "B 1b \n" - "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n" - "LD1 {v13.2d}, %[reg] \n" + "2: \n" + "#store current counter value at the end \n" + "ST1 {v13.2d}, [%[regOut]] \n" + + :[out] "=r" (out), [regOut] "=r" (reg), "=r" (in) + :"0" (out), [Key] "r" (key), [input] "2" (in), + [blocks] "r" (numBlocks), [reg] "1" (reg) + : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13" + ); + break; +#endif /* WOLFSSL_AES_128 */ +#ifdef WOLFSSL_AES_192 + case 12: /* AES 192 BLOCK */ + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "LD1 {v9.2d-v12.2d},[%[Key]], #64 \n" + "LD1 {v13.16b}, [%[Key]], #16 \n" + "LD1 {v15.2d}, [%[reg]] \n" + + "LD1 {v0.2d}, [%[input]], #16 \n" + "1: \n" + "MOV v14.16b, v0.16b \n" + "AESD v0.16b, v1.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v2.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v3.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v4.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v5.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v6.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v7.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v8.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v9.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v10.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v11.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v12.16b \n" + "EOR v0.16b, v0.16b, v13.16b \n" + + "EOR v0.16b, v0.16b, v15.16b \n" + "SUB w11, w11, #1 \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + "MOV v15.16b, v14.16b \n" + + "CBZ w11, 2f \n" + "LD1 {v0.2d}, [%[input]], #16 \n" + "B 1b \n" + + "2:\n" + "#store current counter value at the end \n" + "ST1 {v15.2d}, [%[regOut]] \n" + + :[out] "=r" (out), [regOut] "=r" (reg), "=r" (in) + :"0" (out), [Key] "r" (key), [input] "2" (in), + [blocks] "r" (numBlocks), [reg] "1" (reg) + : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15" + ); + break; +#endif /* WOLFSSL_AES_192 */ +#ifdef WOLFSSL_AES_256 + case 14: /* AES 256 BLOCK */ + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "LD1 {v9.2d-v12.2d}, [%[Key]], #64 \n" + "LD1 {v13.2d-v15.2d}, [%[Key]], #48 \n" + "LD1 {v17.2d}, [%[reg]] \n" + + "LD1 {v0.2d}, [%[input]], #16 \n" + "1: \n" + "MOV v16.16b, v0.16b \n" + "AESD v0.16b, v1.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v2.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v3.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v4.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v5.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v6.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v7.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v8.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v9.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v10.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v11.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v12.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v13.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + + "EOR v0.16b, v0.16b, v17.16b \n" + "SUB w11, w11, #1 \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + "MOV v17.16b, v16.16b \n" + + "CBZ w11, 2f \n" + "LD1 {v0.2d}, [%[input]], #16 \n" + "B 1b \n" + + "2:\n" + "#store current counter value at the end \n" + "ST1 {v17.2d}, [%[regOut]] \n" + + :[out] "=r" (out), [regOut] "=r" (reg), "=r" (in) + :"0" (out), [Key] "r" (key), [input] "2" (in), + [blocks] "r" (numBlocks), [reg] "1" (reg) + : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14","v15", + "v16", "v17" + ); + break; +#endif /* WOLFSSL_AES_256 */ + default: + WOLFSSL_MSG("Bad AES-CBC round value"); + return BAD_FUNC_ARG; + } + } + + return 0; + } + #endif + +#endif /* HAVE_AES_CBC */ + +/* AES-CTR */ +#ifdef WOLFSSL_AES_COUNTER +static void wc_aes_ctr_encrypt_asm(Aes* aes, byte* out, const byte* in, + byte* keyPt, word32 numBlocks) +{ + switch(aes->rounds) { +#ifdef WOLFSSL_AES_128 + case 10: /* AES 128 BLOCK */ + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + + "#Create vector with the value 1 \n" + "MOVI v15.16b, #1 \n" + "USHR v15.2d, v15.2d, #56 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "EOR v14.16b, v14.16b, v14.16b \n" + "EXT v14.16b, v15.16b, v14.16b, #8\n" + + "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n" + "LD1 {v13.2d}, %[reg] \n" /* double block */ "1: \n" @@ -1324,11 +1420,40 @@ static void wc_aes_ctr_encrypt_asm(Aes* aes, byte* out, const byte* in, } } -void AES_CTR_encrypt_AARCH64(Aes* aes, byte* out, const byte* in, word32 sz) +int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) { byte* tmp; word32 numBlocks; + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + switch(aes->rounds) { + #ifdef WOLFSSL_AES_128 + case 10: /* AES 128 BLOCK */ + #endif /* WOLFSSL_AES_128 */ + #ifdef WOLFSSL_AES_192 + case 12: /* AES 192 BLOCK */ + #endif /* WOLFSSL_AES_192 */ + #ifdef WOLFSSL_AES_256 + case 14: /* AES 256 BLOCK */ + #endif /* WOLFSSL_AES_256 */ + break; + default: + WOLFSSL_MSG("Bad AES-CTR round value"); + return BAD_FUNC_ARG; + } + + + tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; + + /* consume any unused bytes left in aes->tmp */ + while ((aes->left != 0) && (sz != 0)) { + *(out++) = *(in++) ^ *(tmp++); + aes->left--; + sz--; + } + /* do as many block size ops as possible */ numBlocks = sz / AES_BLOCK_SIZE; if (numBlocks > 0) { @@ -1353,6 +1478,14 @@ void AES_CTR_encrypt_AARCH64(Aes* aes, byte* out, const byte* in, word32 sz) aes->left--; } } + return 0; +} + +int wc_AesCtrSetKey(Aes* aes, const byte* key, word32 len, + const byte* iv, int dir) +{ + (void)dir; + return wc_AesSetKey(aes, key, len, iv, AES_ENCRYPTION); } #endif /* WOLFSSL_AES_COUNTER */ @@ -1367,7 +1500,7 @@ void AES_CTR_encrypt_AARCH64(Aes* aes, byte* out, const byte* in, word32 sz) /* PMULL and RBIT only with AArch64 */ /* Use ARM hardware for polynomial multiply */ -void GMULT_AARCH64(byte* X, byte* Y) +void GMULT(byte* X, byte* Y) { __asm__ volatile ( "LD1 {v0.16b}, [%[X]] \n" @@ -1399,7 +1532,7 @@ void GMULT_AARCH64(byte* X, byte* Y) ); } -static void GHASH_AARCH64(Gcm* gcm, const byte* a, word32 aSz, const byte* c, +void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c, word32 cSz, byte* s, word32 sSz) { byte scratch[AES_BLOCK_SIZE]; @@ -1641,416 +1774,137 @@ static void GHASH_AARCH64(Gcm* gcm, const byte* a, word32 aSz, const byte* c, "# x[0-2] += C * H^2 \n" "PMULL v8.1q, v12.1d, v4.1d \n" "PMULL2 v9.1q, v12.2d, v4.2d \n" - "EOR v0.16b, v0.16b, v8.16b \n" - "EOR v1.16b, v1.16b, v9.16b \n" - "EXT v12.16b, v12.16b, v12.16b, #8 \n" - "PMULL v9.1q, v12.1d, v4.1d \n" - "PMULL2 v12.1q, v12.2d, v4.2d \n" -#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 - "EOR3 v2.16b, v2.16b, v12.16b, v9.16b \n" -#else - "EOR v12.16b, v12.16b, v9.16b \n" - "EOR v2.16b, v2.16b, v12.16b \n" -#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ - "# x[0-2] += C * H^3 \n" - "PMULL v8.1q, v11.1d, v5.1d \n" - "PMULL2 v9.1q, v11.2d, v5.2d \n" - "EOR v0.16b, v0.16b, v8.16b \n" - "EOR v1.16b, v1.16b, v9.16b \n" - "EXT v11.16b, v11.16b, v11.16b, #8 \n" - "PMULL v9.1q, v11.1d, v5.1d \n" - "PMULL2 v11.1q, v11.2d, v5.2d \n" -#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 - "EOR3 v2.16b, v2.16b, v11.16b, v9.16b \n" -#else - "EOR v11.16b, v11.16b, v9.16b \n" - "EOR v2.16b, v2.16b, v11.16b \n" -#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ - "# x[0-2] += C * H^4 \n" - "PMULL v8.1q, v10.1d, v6.1d \n" - "PMULL2 v9.1q, v10.2d, v6.2d \n" - "EOR v0.16b, v0.16b, v8.16b \n" - "EOR v1.16b, v1.16b, v9.16b \n" - "EXT v10.16b, v10.16b, v10.16b, #8 \n" - "PMULL v9.1q, v10.1d, v6.1d \n" - "PMULL2 v10.1q, v10.2d, v6.2d \n" -#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 - "EOR3 v2.16b, v2.16b, v10.16b, v9.16b \n" -#else - "EOR v10.16b, v10.16b, v9.16b \n" - "EOR v2.16b, v2.16b, v10.16b \n" -#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ - "# Reduce X = x[0-2] \n" - "EXT v9.16b, v0.16b, v1.16b, #8 \n" - "PMULL2 v8.1q, v1.2d, v7.2d \n" -#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 - "EOR3 v9.16b, v9.16b, v2.16b, v8.16b \n" -#else - "EOR v9.16b, v9.16b, v2.16b \n" -#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ -#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 - "EOR v9.16b, v9.16b, v8.16b \n" -#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ - "PMULL2 v8.1q, v9.2d, v7.2d \n" - "MOV v0.D[1], v9.D[0] \n" - "EOR v0.16b, v0.16b, v8.16b \n" - "CMP x12, #64 \n" - "BGE 114b \n" - "CBZ x12, 120f \n" - "115: \n" - "CMP x12, #16 \n" - "BLT 112f \n" - "111: \n" - "LD1 {v14.2d}, [%[c]], #16 \n" - "SUB x12, x12, #16 \n" - "RBIT v14.16b, v14.16b \n" - "EOR v0.16b, v0.16b, v14.16b \n" - "PMULL v10.1q, v0.1d, v3.1d \n" - "PMULL2 v11.1q, v0.2d, v3.2d \n" - "EXT v12.16b, v3.16b, v3.16b, #8 \n" - "PMULL v13.1q, v0.1d, v12.1d \n" - "PMULL2 v12.1q, v0.2d, v12.2d \n" - "EOR v12.16b, v12.16b, v13.16b \n" - "EXT v13.16b, v10.16b, v11.16b, #8 \n" - "EOR v13.16b, v13.16b, v12.16b \n" - "# Reduce \n" - "PMULL2 v12.1q, v11.2d, v7.2d \n" - "EOR v13.16b, v13.16b, v12.16b \n" - "PMULL2 v12.1q, v13.2d, v7.2d \n" - "MOV v10.D[1], v13.D[0] \n" - "EOR v0.16b, v10.16b, v12.16b \n" - "CMP x12, #16 \n" - "BGE 111b \n" - "CBZ x12, 120f \n" - "112: \n" - "# Partial cipher text \n" - "EOR v14.16b, v14.16b, v14.16b \n" - "MOV x14, x12 \n" - "ST1 {v14.2d}, [%[scratch]] \n" - "113: \n" - "LDRB w13, [%[c]], #1 \n" - "STRB w13, [%[scratch]], #1 \n" - "SUB x14, x14, #1 \n" - "CBNZ x14, 113b \n" - "SUB %[scratch], %[scratch], x12 \n" - "LD1 {v14.2d}, [%[scratch]] \n" - "RBIT v14.16b, v14.16b \n" - "EOR v0.16b, v0.16b, v14.16b \n" - "PMULL v10.1q, v0.1d, v3.1d \n" - "PMULL2 v11.1q, v0.2d, v3.2d \n" - "EXT v12.16b, v3.16b, v3.16b, #8 \n" - "PMULL v13.1q, v0.1d, v12.1d \n" - "PMULL2 v12.1q, v0.2d, v12.2d \n" - "EOR v12.16b, v12.16b, v13.16b \n" - "EXT v13.16b, v10.16b, v11.16b, #8 \n" - "EOR v13.16b, v13.16b, v12.16b \n" - "# Reduce \n" - "PMULL2 v12.1q, v11.2d, v7.2d \n" - "EOR v13.16b, v13.16b, v12.16b \n" - "PMULL2 v12.1q, v13.2d, v7.2d \n" - "MOV v10.D[1], v13.D[0] \n" - "EOR v0.16b, v10.16b, v12.16b \n" - "120: \n" - "RBIT v0.16b, v0.16b \n" - "LSL %x[aSz], %x[aSz], #3 \n" - "LSL %x[cSz], %x[cSz], #3 \n" - "MOV v10.D[0], %x[aSz] \n" - "MOV v10.D[1], %x[cSz] \n" - "REV64 v10.16b, v10.16b \n" - "EOR v0.16b, v0.16b, v10.16b \n" - "ST1 {v0.16b}, [%[scratch]] \n" - : [cSz] "+r" (cSz), [c] "+r" (c), [aSz] "+r" (aSz), [a] "+r" (a) - : [scratch] "r" (scratch), [h] "m" (gcm->H) - : "cc", "memory", "w12", "w13", "x14", - "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", - "v8", "v9", "v10", "v11", "v12", "v13", "v14" - ); - - XMEMCPY(s, scratch, sSz); -} - -#ifdef WOLFSSL_AESGCM_STREAM - /* Access initialization counter data. */ - #define AES_INITCTR(aes) ((aes)->streamData + 0 * AES_BLOCK_SIZE) - /* Access counter data. */ - #define AES_COUNTER(aes) ((aes)->streamData + 1 * AES_BLOCK_SIZE) - /* Access tag data. */ - #define AES_TAG(aes) ((aes)->streamData + 2 * AES_BLOCK_SIZE) - /* Access last GHASH block. */ - #define AES_LASTGBLOCK(aes) ((aes)->streamData + 3 * AES_BLOCK_SIZE) - /* Access last encrypted block. */ - #define AES_LASTBLOCK(aes) ((aes)->streamData + 4 * AES_BLOCK_SIZE) - -/* GHASH one block of data. - * - * XOR block into tag and GMULT with H. - * - * @param [in, out] aes AES GCM object. - * @param [in] block Block of AAD or cipher text. - */ -#define GHASH_ONE_BLOCK_AARCH64(aes, block) \ - do { \ - xorbuf(AES_TAG(aes), block, AES_BLOCK_SIZE); \ - GMULT_AARCH64(AES_TAG(aes), aes->gcm.H); \ - } \ - while (0) - -/* Hash in the lengths of the AAD and cipher text in bits. - * - * Default implementation. - * - * @param [in, out] aes AES GCM object. - */ -#define GHASH_LEN_BLOCK_AARCH64(aes) \ - do { \ - byte scratch[AES_BLOCK_SIZE]; \ - FlattenSzInBits(&scratch[0], aes->aSz); \ - FlattenSzInBits(&scratch[8], aes->cSz); \ - GHASH_ONE_BLOCK_AARCH64(aes, scratch); \ - } \ - while (0) - -/* Update the GHASH with AAD and/or cipher text. - * - * @param [in,out] aes AES GCM object. - * @param [in] a Additional authentication data buffer. - * @param [in] aSz Size of data in AAD buffer. - * @param [in] c Cipher text buffer. - * @param [in] cSz Size of data in cipher text buffer. - */ -void GHASH_UPDATE_AARCH64(Aes* aes, const byte* a, word32 aSz, const byte* c, - word32 cSz) -{ - word32 blocks; - word32 partial; - - /* Hash in A, the Additional Authentication Data */ - if (aSz != 0 && a != NULL) { - /* Update count of AAD we have hashed. */ - aes->aSz += aSz; - /* Check if we have unprocessed data. */ - if (aes->aOver > 0) { - /* Calculate amount we can use - fill up the block. */ - byte sz = AES_BLOCK_SIZE - aes->aOver; - if (sz > aSz) { - sz = aSz; - } - /* Copy extra into last GHASH block array and update count. */ - XMEMCPY(AES_LASTGBLOCK(aes) + aes->aOver, a, sz); - aes->aOver += sz; - if (aes->aOver == AES_BLOCK_SIZE) { - /* We have filled up the block and can process. */ - GHASH_ONE_BLOCK_AARCH64(aes, AES_LASTGBLOCK(aes)); - /* Reset count. */ - aes->aOver = 0; - } - /* Used up some data. */ - aSz -= sz; - a += sz; - } - - /* Calculate number of blocks of AAD and the leftover. */ - blocks = aSz / AES_BLOCK_SIZE; - partial = aSz % AES_BLOCK_SIZE; - /* GHASH full blocks now. */ - while (blocks--) { - GHASH_ONE_BLOCK_AARCH64(aes, a); - a += AES_BLOCK_SIZE; - } - if (partial != 0) { - /* Cache the partial block. */ - XMEMCPY(AES_LASTGBLOCK(aes), a, partial); - aes->aOver = (byte)partial; - } - } - if (aes->aOver > 0 && cSz > 0 && c != NULL) { - /* No more AAD coming and we have a partial block. */ - /* Fill the rest of the block with zeros. */ - byte sz = AES_BLOCK_SIZE - aes->aOver; - XMEMSET(AES_LASTGBLOCK(aes) + aes->aOver, 0, sz); - /* GHASH last AAD block. */ - GHASH_ONE_BLOCK_AARCH64(aes, AES_LASTGBLOCK(aes)); - /* Clear partial count for next time through. */ - aes->aOver = 0; - } - - /* Hash in C, the Ciphertext */ - if (cSz != 0 && c != NULL) { - /* Update count of cipher text we have hashed. */ - aes->cSz += cSz; - if (aes->cOver > 0) { - /* Calculate amount we can use - fill up the block. */ - byte sz = AES_BLOCK_SIZE - aes->cOver; - if (sz > cSz) { - sz = cSz; - } - XMEMCPY(AES_LASTGBLOCK(aes) + aes->cOver, c, sz); - /* Update count of unused encrypted counter. */ - aes->cOver += sz; - if (aes->cOver == AES_BLOCK_SIZE) { - /* We have filled up the block and can process. */ - GHASH_ONE_BLOCK_AARCH64(aes, AES_LASTGBLOCK(aes)); - /* Reset count. */ - aes->cOver = 0; - } - /* Used up some data. */ - cSz -= sz; - c += sz; - } - - /* Calculate number of blocks of cipher text and the leftover. */ - blocks = cSz / AES_BLOCK_SIZE; - partial = cSz % AES_BLOCK_SIZE; - /* GHASH full blocks now. */ - while (blocks--) { - GHASH_ONE_BLOCK_AARCH64(aes, c); - c += AES_BLOCK_SIZE; - } - if (partial != 0) { - /* Cache the partial block. */ - XMEMCPY(AES_LASTGBLOCK(aes), c, partial); - aes->cOver = (byte)partial; - } - } -} - -/* Finalize the GHASH calculation. - * - * Complete hashing cipher text and hash the AAD and cipher text lengths. - * - * @param [in, out] aes AES GCM object. - * @param [out] s Authentication tag. - * @param [in] sSz Size of authentication tag required. - */ -static void GHASH_FINAL_AARCH64(Aes* aes, byte* s, word32 sSz) -{ - /* AAD block incomplete when > 0 */ - byte over = aes->aOver; - - if (aes->cOver > 0) { - /* Cipher text block incomplete. */ - over = aes->cOver; - } - if (over > 0) { - /* Zeroize the unused part of the block. */ - XMEMSET(AES_LASTGBLOCK(aes) + over, 0, AES_BLOCK_SIZE - over); - /* Hash the last block of cipher text. */ - GHASH_ONE_BLOCK_AARCH64(aes, AES_LASTGBLOCK(aes)); - } - /* Hash in the lengths of AAD and cipher text in bits */ - GHASH_LEN_BLOCK_AARCH64(aes); - /* Copy the result into s. */ - XMEMCPY(s, AES_TAG(aes), sSz); -} - -void AES_GCM_init_AARCH64(Aes* aes, const byte* iv, word32 ivSz) -{ - ALIGN32 byte counter[AES_BLOCK_SIZE]; - - if (ivSz == GCM_NONCE_MID_SZ) { - /* Counter is IV with bottom 4 bytes set to: 0x00,0x00,0x00,0x01. */ - XMEMCPY(counter, iv, ivSz); - XMEMSET(counter + GCM_NONCE_MID_SZ, 0, - AES_BLOCK_SIZE - GCM_NONCE_MID_SZ - 1); - counter[AES_BLOCK_SIZE - 1] = 1; - } - else { - /* Counter is GHASH of IV. */ - #ifdef OPENSSL_EXTRA - word32 aadTemp = aes->gcm.aadLen; - aes->gcm.aadLen = 0; - #endif - GHASH_AARCH64(&aes->gcm, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE); - GMULT_AARCH64(counter, aes->gcm.H); - #ifdef OPENSSL_EXTRA - aes->gcm.aadLen = aadTemp; - #endif - } - - /* Copy in the counter for use with cipher. */ - XMEMCPY(AES_COUNTER(aes), counter, AES_BLOCK_SIZE); - /* Encrypt initial counter into a buffer for GCM. */ - AES_encrypt_AARCH64(counter, AES_INITCTR(aes), (byte*)aes->key, - (int)aes->rounds); -} - -void AES_GCM_crypt_update_AARCH64(Aes* aes, byte* out, const byte* in, - word32 sz) -{ - word32 blocks; - word32 partial; - - /* Check if previous encrypted block was not used up. */ - if (aes->over > 0) { - byte pSz = AES_BLOCK_SIZE - aes->over; - if (pSz > sz) pSz = sz; - - /* Use some/all of last encrypted block. */ - xorbufout(out, AES_LASTBLOCK(aes) + aes->over, in, pSz); - aes->over = (aes->over + pSz) & (AES_BLOCK_SIZE - 1); - - /* Some data used. */ - sz -= pSz; - in += pSz; - out += pSz; - } - - /* Calculate the number of blocks needing to be encrypted and any leftover. - */ - blocks = sz / AES_BLOCK_SIZE; - partial = sz & (AES_BLOCK_SIZE - 1); - - /* Encrypt block by block. */ - while (blocks--) { - ALIGN32 byte scratch[AES_BLOCK_SIZE]; - IncrementGcmCounter(AES_COUNTER(aes)); - /* Encrypt counter into a buffer. */ - AES_encrypt_AARCH64(AES_COUNTER(aes), scratch, (byte*)aes->key, - (int)aes->rounds); - /* XOR plain text into encrypted counter into cipher text buffer. */ - xorbufout(out, scratch, in, AES_BLOCK_SIZE); - /* Data complete. */ - in += AES_BLOCK_SIZE; - out += AES_BLOCK_SIZE; - } - - if (partial != 0) { - /* Generate an extra block and use up as much as needed. */ - IncrementGcmCounter(AES_COUNTER(aes)); - /* Encrypt counter into cache. */ - AES_encrypt_AARCH64(AES_COUNTER(aes), AES_LASTBLOCK(aes), - (byte*)aes->key, (int)aes->rounds); - /* XOR plain text into encrypted counter into cipher text buffer. */ - xorbufout(out, AES_LASTBLOCK(aes), in, partial); - /* Keep amount of encrypted block used. */ - aes->over = partial; - } -} + "EOR v0.16b, v0.16b, v8.16b \n" + "EOR v1.16b, v1.16b, v9.16b \n" + "EXT v12.16b, v12.16b, v12.16b, #8 \n" + "PMULL v9.1q, v12.1d, v4.1d \n" + "PMULL2 v12.1q, v12.2d, v4.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v2.16b, v2.16b, v12.16b, v9.16b \n" +#else + "EOR v12.16b, v12.16b, v9.16b \n" + "EOR v2.16b, v2.16b, v12.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^3 \n" + "PMULL v8.1q, v11.1d, v5.1d \n" + "PMULL2 v9.1q, v11.2d, v5.2d \n" + "EOR v0.16b, v0.16b, v8.16b \n" + "EOR v1.16b, v1.16b, v9.16b \n" + "EXT v11.16b, v11.16b, v11.16b, #8 \n" + "PMULL v9.1q, v11.1d, v5.1d \n" + "PMULL2 v11.1q, v11.2d, v5.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v2.16b, v2.16b, v11.16b, v9.16b \n" +#else + "EOR v11.16b, v11.16b, v9.16b \n" + "EOR v2.16b, v2.16b, v11.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^4 \n" + "PMULL v8.1q, v10.1d, v6.1d \n" + "PMULL2 v9.1q, v10.2d, v6.2d \n" + "EOR v0.16b, v0.16b, v8.16b \n" + "EOR v1.16b, v1.16b, v9.16b \n" + "EXT v10.16b, v10.16b, v10.16b, #8 \n" + "PMULL v9.1q, v10.1d, v6.1d \n" + "PMULL2 v10.1q, v10.2d, v6.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v2.16b, v2.16b, v10.16b, v9.16b \n" +#else + "EOR v10.16b, v10.16b, v9.16b \n" + "EOR v2.16b, v2.16b, v10.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# Reduce X = x[0-2] \n" + "EXT v9.16b, v0.16b, v1.16b, #8 \n" + "PMULL2 v8.1q, v1.2d, v7.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v9.16b, v9.16b, v2.16b, v8.16b \n" +#else + "EOR v9.16b, v9.16b, v2.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v9.16b, v9.16b, v8.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "PMULL2 v8.1q, v9.2d, v7.2d \n" + "MOV v0.D[1], v9.D[0] \n" + "EOR v0.16b, v0.16b, v8.16b \n" + "CMP x12, #64 \n" + "BGE 114b \n" + "CBZ x12, 120f \n" + "115: \n" + "CMP x12, #16 \n" + "BLT 112f \n" + "111: \n" + "LD1 {v14.2d}, [%[c]], #16 \n" + "SUB x12, x12, #16 \n" + "RBIT v14.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v14.16b \n" + "PMULL v10.1q, v0.1d, v3.1d \n" + "PMULL2 v11.1q, v0.2d, v3.2d \n" + "EXT v12.16b, v3.16b, v3.16b, #8 \n" + "PMULL v13.1q, v0.1d, v12.1d \n" + "PMULL2 v12.1q, v0.2d, v12.2d \n" + "EOR v12.16b, v12.16b, v13.16b \n" + "EXT v13.16b, v10.16b, v11.16b, #8 \n" + "EOR v13.16b, v13.16b, v12.16b \n" + "# Reduce \n" + "PMULL2 v12.1q, v11.2d, v7.2d \n" + "EOR v13.16b, v13.16b, v12.16b \n" + "PMULL2 v12.1q, v13.2d, v7.2d \n" + "MOV v10.D[1], v13.D[0] \n" + "EOR v0.16b, v10.16b, v12.16b \n" + "CMP x12, #16 \n" + "BGE 111b \n" + "CBZ x12, 120f \n" + "112: \n" + "# Partial cipher text \n" + "EOR v14.16b, v14.16b, v14.16b \n" + "MOV x14, x12 \n" + "ST1 {v14.2d}, [%[scratch]] \n" + "113: \n" + "LDRB w13, [%[c]], #1 \n" + "STRB w13, [%[scratch]], #1 \n" + "SUB x14, x14, #1 \n" + "CBNZ x14, 113b \n" + "SUB %[scratch], %[scratch], x12 \n" + "LD1 {v14.2d}, [%[scratch]] \n" + "RBIT v14.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v14.16b \n" + "PMULL v10.1q, v0.1d, v3.1d \n" + "PMULL2 v11.1q, v0.2d, v3.2d \n" + "EXT v12.16b, v3.16b, v3.16b, #8 \n" + "PMULL v13.1q, v0.1d, v12.1d \n" + "PMULL2 v12.1q, v0.2d, v12.2d \n" + "EOR v12.16b, v12.16b, v13.16b \n" + "EXT v13.16b, v10.16b, v11.16b, #8 \n" + "EOR v13.16b, v13.16b, v12.16b \n" + "# Reduce \n" + "PMULL2 v12.1q, v11.2d, v7.2d \n" + "EOR v13.16b, v13.16b, v12.16b \n" + "PMULL2 v12.1q, v13.2d, v7.2d \n" + "MOV v10.D[1], v13.D[0] \n" + "EOR v0.16b, v10.16b, v12.16b \n" + "120: \n" + "RBIT v0.16b, v0.16b \n" + "LSL %x[aSz], %x[aSz], #3 \n" + "LSL %x[cSz], %x[cSz], #3 \n" + "MOV v10.D[0], %x[aSz] \n" + "MOV v10.D[1], %x[cSz] \n" + "REV64 v10.16b, v10.16b \n" + "EOR v0.16b, v0.16b, v10.16b \n" + "ST1 {v0.16b}, [%[scratch]] \n" + : [cSz] "+r" (cSz), [c] "+r" (c), [aSz] "+r" (aSz), [a] "+r" (a) + : [scratch] "r" (scratch), [h] "m" (gcm->H) + : "cc", "memory", "w12", "w13", "x14", + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14" + ); -/* Calculates authentication tag for AES GCM. C implementation. - * - * @param [in, out] aes AES object. - * @param [out] authTag Buffer to store authentication tag in. - * @param [in] authTagSz Length of tag to create. - */ -void AES_GCM_final_AARCH64(Aes* aes, byte* authTag, word32 authTagSz) -{ - /* Calculate authentication tag. */ - GHASH_FINAL_AARCH64(aes, authTag, authTagSz); - /* XOR in as much of encrypted counter as is required. */ - xorbuf(authTag, AES_INITCTR(aes), authTagSz); -#ifdef OPENSSL_EXTRA - /* store AAD size for next call */ - aes->gcm.aadLen = aes->aSz; -#endif - /* Zeroize last block to protect sensitive data. */ - ForceZero(AES_LASTBLOCK(aes), AES_BLOCK_SIZE); + XMEMCPY(s, scratch, sSz); } -#endif /* WOLFSSL_AESGCM_STREAM */ #ifdef WOLFSSL_AES_128 /* internal function : see wc_AesGcmEncrypt */ -static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, +static int Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, const byte* iv, word32 ivSz, byte* authTag, word32 authTagSz, const byte* authIn, word32 authInSz) { @@ -2070,8 +1924,8 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, counter[AES_BLOCK_SIZE - 1] = 1; } else { - GHASH_AARCH64(&aes->gcm, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE); - GMULT_AARCH64(counter, aes->gcm.H); + GHASH(&aes->gcm, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE); + GMULT(counter, aes->gcm.H); } __asm__ __volatile__ ( @@ -3689,11 +3543,14 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ); + + + return 0; } #endif /* WOLFSSL_AES_128 */ #ifdef WOLFSSL_AES_192 /* internal function : see wc_AesGcmEncrypt */ -static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, +static int Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, const byte* iv, word32 ivSz, byte* authTag, word32 authTagSz, const byte* authIn, word32 authInSz) { @@ -3713,8 +3570,8 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, counter[AES_BLOCK_SIZE - 1] = 1; } else { - GHASH_AARCH64(&aes->gcm, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE); - GMULT_AARCH64(counter, aes->gcm.H); + GHASH(&aes->gcm, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE); + GMULT(counter, aes->gcm.H); } __asm__ __volatile__ ( @@ -5449,11 +5306,14 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ); + + + return 0; } #endif /* WOLFSSL_AES_192 */ #ifdef WOLFSSL_AES_256 /* internal function : see wc_AesGcmEncrypt */ -static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, +static int Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, const byte* iv, word32 ivSz, byte* authTag, word32 authTagSz, const byte* authIn, word32 authInSz) { @@ -5473,8 +5333,8 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, counter[AES_BLOCK_SIZE - 1] = 1; } else { - GHASH_AARCH64(&aes->gcm, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE); - GMULT_AARCH64(counter, aes->gcm.H); + GHASH(&aes->gcm, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE); + GMULT(counter, aes->gcm.H); } __asm__ __volatile__ ( @@ -7340,6 +7200,9 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ); + + + return 0; } #endif /* WOLFSSL_AES_256 */ @@ -7364,29 +7227,41 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, * by Conrado P.L. Gouvea and Julio Lopez reduction on 256bit value using * Algorithm 5 */ -void AES_GCM_encrypt_AARCH64(Aes* aes, byte* out, const byte* in, word32 sz, +int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, const byte* iv, word32 ivSz, byte* authTag, word32 authTagSz, const byte* authIn, word32 authInSz) { + /* sanity checks */ + if ((aes == NULL) || (iv == NULL && ivSz > 0) || (authTag == NULL) || + ((authIn == NULL) && (authInSz > 0)) || (ivSz == 0)) { + WOLFSSL_MSG("a NULL parameter passed in when size is larger than 0"); + return BAD_FUNC_ARG; + } + + if ((authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ) || (authTagSz > AES_BLOCK_SIZE)) { + WOLFSSL_MSG("GcmEncrypt authTagSz error"); + return BAD_FUNC_ARG; + } + switch (aes->rounds) { #ifdef WOLFSSL_AES_128 case 10: - Aes128GcmEncrypt(aes, out, in, sz, iv, ivSz, authTag, authTagSz, - authIn, authInSz); - break; + return Aes128GcmEncrypt(aes, out, in, sz, iv, ivSz, + authTag, authTagSz, authIn, authInSz); #endif #ifdef WOLFSSL_AES_192 case 12: - Aes192GcmEncrypt(aes, out, in, sz, iv, ivSz, authTag, authTagSz, - authIn, authInSz); - break; + return Aes192GcmEncrypt(aes, out, in, sz, iv, ivSz, + authTag, authTagSz, authIn, authInSz); #endif #ifdef WOLFSSL_AES_256 case 14: - Aes256GcmEncrypt(aes, out, in, sz, iv, ivSz, authTag, authTagSz, - authIn, authInSz); - break; + return Aes256GcmEncrypt(aes, out, in, sz, iv, ivSz, + authTag, authTagSz, authIn, authInSz); #endif + default: + WOLFSSL_MSG("AES-GCM invalid round number"); + return BAD_FUNC_ARG; } } @@ -7409,8 +7284,8 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, counter[AES_BLOCK_SIZE - 1] = 1; } else { - GHASH_AARCH64(&aes->gcm, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE); - GMULT_AARCH64(counter, aes->gcm.H); + GHASH(&aes->gcm, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE); + GMULT(counter, aes->gcm.H); } __asm__ __volatile__ ( @@ -9060,8 +8935,8 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, counter[AES_BLOCK_SIZE - 1] = 1; } else { - GHASH_AARCH64(&aes->gcm, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE); - GMULT_AARCH64(counter, aes->gcm.H); + GHASH(&aes->gcm, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE); + GMULT(counter, aes->gcm.H); } __asm__ __volatile__ ( @@ -10828,8 +10703,8 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, counter[AES_BLOCK_SIZE - 1] = 1; } else { - GHASH_AARCH64(&aes->gcm, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE); - GMULT_AARCH64(counter, aes->gcm.H); + GHASH(&aes->gcm, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE); + GMULT(counter, aes->gcm.H); } __asm__ __volatile__ ( @@ -12712,30 +12587,38 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, * authIn: additional data buffer * authInSz: size of additional data buffer */ -int AES_GCM_decrypt_AARCH64(Aes* aes, byte* out, const byte* in, word32 sz, +int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, const byte* iv, word32 ivSz, const byte* authTag, word32 authTagSz, const byte* authIn, word32 authInSz) { /* sanity checks */ + if ((aes == NULL) || (iv == NULL) || (authTag == NULL) || + (authTagSz > AES_BLOCK_SIZE) || (authTagSz == 0) || (ivSz == 0) || + ((sz != 0) && ((in == NULL) || (out == NULL)))) { + WOLFSSL_MSG("a NULL parameter passed in when size is larger than 0"); + return BAD_FUNC_ARG; + } + switch (aes->rounds) { #ifdef WOLFSSL_AES_128 case 10: - return Aes128GcmDecrypt(aes, out, in, sz, iv, ivSz, authTag, - authTagSz, authIn, authInSz); + return Aes128GcmDecrypt(aes, out, in, sz, iv, ivSz, + authTag, authTagSz, authIn, authInSz); #endif #ifdef WOLFSSL_AES_192 case 12: - return Aes192GcmDecrypt(aes, out, in, sz, iv, ivSz, authTag, - authTagSz, authIn, authInSz); + return Aes192GcmDecrypt(aes, out, in, sz, iv, ivSz, + authTag, authTagSz, authIn, authInSz); #endif #ifdef WOLFSSL_AES_256 case 14: - return Aes256GcmDecrypt(aes, out, in, sz, iv, ivSz, authTag, - authTagSz, authIn, authInSz); + return Aes256GcmDecrypt(aes, out, in, sz, iv, ivSz, + authTag, authTagSz, authIn, authInSz); #endif + default: + WOLFSSL_MSG("AES-GCM invalid round number"); + return BAD_FUNC_ARG; } - - return BAD_FUNC_ARG; } #endif /* HAVE_AES_DECRYPT */ @@ -14296,7 +14179,6 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, while (blocks--) { IncrementGcmCounter(ctr); wc_AesEncrypt(aes, ctr, scratch); -#endif xorbuf(scratch, c, AES_BLOCK_SIZE); XMEMCPY(p, scratch, AES_BLOCK_SIZE); p += AES_BLOCK_SIZE; @@ -14319,9 +14201,10 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, #endif /* HAVE_AES_DECRYPT */ #endif /* HAVE_AESGCM */ +#endif /* aarch64 */ + #ifdef HAVE_AESGCM #ifdef WOLFSSL_AESGCM_STREAM -#ifndef __aarch64__ /* Access initialization counter data. */ #define AES_INITCTR(aes) ((aes)->streamData + 0 * AES_BLOCK_SIZE) /* Access counter data. */ @@ -14539,13 +14422,8 @@ static void AesGcmInit_C(Aes* aes, const byte* iv, word32 ivSz) word32 aadTemp = aes->gcm.aadLen; aes->gcm.aadLen = 0; #endif - #ifdef __aarch64__ - GHASH_AARCH64(&aes->gcm, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE); - GMULT_AARCH64(counter, aes->gcm.H); - #else GHASH(&aes->gcm, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE); GMULT(counter, aes->gcm.H); - #endif #ifdef OPENSSL_EXTRA aes->gcm.aadLen = aadTemp; #endif @@ -14554,8 +14432,7 @@ static void AesGcmInit_C(Aes* aes, const byte* iv, word32 ivSz) /* Copy in the counter for use with cipher. */ XMEMCPY(AES_COUNTER(aes), counter, AES_BLOCK_SIZE); /* Encrypt initial counter into a buffer for GCM. */ - AES_encrypt_AARCH64(counter, AES_INITCTR(aes), (byte*)aes->key, - aes->rounds); + wc_AesEncrypt(aes, counter, AES_INITCTR(aes)); /* Reset state fields. */ aes->over = 0; aes->aSz = 0; @@ -14603,8 +14480,7 @@ static void AesGcmCryptUpdate_C(Aes* aes, byte* out, const byte* in, word32 sz) ALIGN32 byte scratch[AES_BLOCK_SIZE]; IncrementGcmCounter(AES_COUNTER(aes)); /* Encrypt counter into a buffer. */ - AES_encrypt_AARCH64(AES_COUNTER(aes), scratch, (byte*)aes->key, - aes->rounds); + wc_AesEncrypt(aes, AES_COUNTER(aes), scratch); /* XOR plain text into encrypted counter into cipher text buffer. */ xorbufout(out, scratch, in, AES_BLOCK_SIZE); /* Data complete. */ @@ -14616,8 +14492,7 @@ static void AesGcmCryptUpdate_C(Aes* aes, byte* out, const byte* in, word32 sz) /* Generate an extra block and use up as much as needed. */ IncrementGcmCounter(AES_COUNTER(aes)); /* Encrypt counter into cache. */ - AES_encrypt_AARCH64(AES_COUNTER(aes), AES_LASTBLOCK(aes), - (byte*)aes->key, (int)aes->rounds); + wc_AesEncrypt(aes, AES_COUNTER(aes), AES_LASTBLOCK(aes)); /* XOR plain text into encrypted counter into cipher text buffer. */ xorbufout(out, AES_LASTBLOCK(aes), in, partial); /* Keep amount of encrypted block used. */ @@ -14961,13 +14836,11 @@ int wc_AesGcmDecryptFinal(Aes* aes, const byte* authTag, word32 authTagSz) return ret; } #endif /* HAVE_AES_DECRYPT || HAVE_AESGCM_DECRYPT */ -#endif /* !__aarch64__ */ #endif /* WOLFSSL_AESGCM_STREAM */ #endif /* HAVE_AESGCM */ #ifdef HAVE_AESCCM -#ifndef __aarch64__ /* Software version of AES-CCM from wolfcrypt/src/aes.c * Gets some speed up from hardware acceleration of wc_AesEncrypt */ @@ -15237,30 +15110,11 @@ int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz, return result; } #endif /* HAVE_AES_DECRYPT */ -#endif /* !__aarch64__ */ #endif /* HAVE_AESCCM */ #ifdef HAVE_AESGCM /* common GCM functions 32 and 64 bit */ -#if defined(__aarch64__) -void AES_GCM_set_key_AARCH64(Aes* aes, byte* iv) -{ - - AES_encrypt_AARCH64(iv, aes->gcm.H, (byte*)aes->key, aes->rounds); - { - word32* pt = (word32*)aes->gcm.H; - __asm__ volatile ( - "LD1 {v0.16b}, [%[h]] \n" - "RBIT v0.16b, v0.16b \n" - "ST1 {v0.16b}, [%[out]] \n" - : [out] "=r" (pt) - : [h] "0" (pt) - : "cc", "memory", "v0" - ); - } -} -#else int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) { int ret; @@ -15278,6 +15132,19 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) #endif wc_AesEncrypt(aes, iv, aes->gcm.H); + #if defined(__aarch64__) + { + word32* pt = (word32*)aes->gcm.H; + __asm__ volatile ( + "LD1 {v0.16b}, [%[h]] \n" + "RBIT v0.16b, v0.16b \n" + "ST1 {v0.16b}, [%[out]] \n" + : [out] "=r" (pt) + : [h] "0" (pt) + : "cc", "memory", "v0" + ); + } + #else { word32* pt = (word32*)aes->gcm.H; __asm__ volatile ( @@ -15290,15 +15157,14 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) : "cc", "memory", "q0" ); } + #endif } return ret; } -#endif #endif /* HAVE_AESGCM */ -#ifndef __aarch64__ /* AES-DIRECT */ #if defined(WOLFSSL_AES_DIRECT) /* Allow direct access to one block encrypt */ @@ -15322,7 +15188,6 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) } #endif /* HAVE_AES_DECRYPT */ #endif /* WOLFSSL_AES_DIRECT */ -#endif /* !__aarch64__ */ #ifdef WOLFSSL_AES_XTS @@ -15506,12 +15371,26 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) * * returns 0 on success */ -void AES_XTS_encrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz, - const byte* i) +int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, + const byte* i, word32 iSz) { + int ret = 0; word32 blocks = (sz / AES_BLOCK_SIZE); byte tmp[AES_BLOCK_SIZE]; + if (xaes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + if (iSz < AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + + if (blocks == 0) { + WOLFSSL_MSG("Plain text input too small for encryption"); + return BAD_FUNC_ARG; + } + __asm__ __volatile__ ( "MOV x19, 0x87 \n" @@ -15812,6 +15691,8 @@ void AES_XTS_encrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz, "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23" ); + + return ret; } /* Same process as encryption but Aes key is AES_DECRYPTION type. @@ -15826,13 +15707,27 @@ void AES_XTS_encrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz, * * returns 0 on success */ -void AES_XTS_decrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz, - const byte* i) +int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, + const byte* i, word32 iSz) { + int ret = 0; word32 blocks = (sz / AES_BLOCK_SIZE); byte tmp[AES_BLOCK_SIZE]; byte stl = (sz % AES_BLOCK_SIZE); + if (xaes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + if (iSz < AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + + if (blocks == 0) { + WOLFSSL_MSG("Plain text input too small for encryption"); + return BAD_FUNC_ARG; + } + /* if Stealing then break out of loop one block early to handle special * case */ blocks -= (stl > 0); @@ -16144,6 +16039,8 @@ void AES_XTS_decrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz, "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23" ); + + return ret; } #else @@ -16659,7 +16556,6 @@ extern void GCM_gmult_len(byte* x, /* const */ byte m[32][AES_BLOCK_SIZE], extern void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr); -#ifndef __aarch64__ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv, int dir) { @@ -17248,22 +17144,9 @@ static WC_INLINE void RIGHTSHIFTX(byte* x) } #if defined(GCM_TABLE) || defined(GCM_TABLE_4BIT) - -#if defined(__aarch64__) && !defined(BIG_ENDIAN_ORDER) -static WC_INLINE void Shift4_M0(byte *r8, byte *z8) -{ - int i; - for (i = 15; i > 0; i--) - r8[i] = (byte)(z8[i-1] << 4) | (byte)(z8[i] >> 4); - r8[0] = (byte)(z8[0] >> 4); -} -#endif - void GenerateM0(Gcm* gcm) { -#if !defined(__aarch64__) || !defined(BIG_ENDIAN_ORDER) int i; -#endif byte (*m)[AES_BLOCK_SIZE] = gcm->M0; /* 0 times -> 0x0 */ @@ -17308,7 +17191,6 @@ void GenerateM0(Gcm* gcm) XMEMCPY(m[0xf], m[0x8], AES_BLOCK_SIZE); xorbuf (m[0xf], m[0x7], AES_BLOCK_SIZE); -#ifndef __aarch64__ for (i = 0; i < 16; i++) { word32* m32 = (word32*)gcm->M0[i]; m32[0] = ByteReverseWord32(m32[0]); @@ -17316,11 +17198,6 @@ void GenerateM0(Gcm* gcm) m32[2] = ByteReverseWord32(m32[2]); m32[3] = ByteReverseWord32(m32[3]); } -#elif !defined(BIG_ENDIAN_ORDER) - for (i = 0; i < 16; i++) { - Shift4_M0(m[16+i], m[i]); - } -#endif } #endif /* GCM_TABLE */ @@ -17358,7 +17235,6 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) return ret; } -#ifndef __aarch64__ static WC_INLINE void IncrementGcmCounter(byte* inOutCtr) { int i; @@ -17369,7 +17245,6 @@ static WC_INLINE void IncrementGcmCounter(byte* inOutCtr) return; } } -#endif static WC_INLINE void FlattenSzInBits(byte* buf, word32 sz) { @@ -17686,7 +17561,6 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, return 0; } #endif /* HAVE_AESGCM */ -#endif /* !__aarch64__ */ #endif /* !WOLFSSL_ARMASM_NO_HW_CRYPTO */ #endif /* !NO_AES && WOLFSSL_ARMASM */ diff --git a/wolfcrypt/src/port/arm/armv8-sha256.c b/wolfcrypt/src/port/arm/armv8-sha256.c index 9d5dc25609..dabe7af9c3 100644 --- a/wolfcrypt/src/port/arm/armv8-sha256.c +++ b/wolfcrypt/src/port/arm/armv8-sha256.c @@ -1407,214 +1407,7 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash) return ret; } -#elif defined(__aarch64__) - - static const FLASH_QUALIFIER ALIGN32 word32 K[64] = { - 0x428A2F98L, 0x71374491L, 0xB5C0FBCFL, 0xE9B5DBA5L, 0x3956C25BL, - 0x59F111F1L, 0x923F82A4L, 0xAB1C5ED5L, 0xD807AA98L, 0x12835B01L, - 0x243185BEL, 0x550C7DC3L, 0x72BE5D74L, 0x80DEB1FEL, 0x9BDC06A7L, - 0xC19BF174L, 0xE49B69C1L, 0xEFBE4786L, 0x0FC19DC6L, 0x240CA1CCL, - 0x2DE92C6FL, 0x4A7484AAL, 0x5CB0A9DCL, 0x76F988DAL, 0x983E5152L, - 0xA831C66DL, 0xB00327C8L, 0xBF597FC7L, 0xC6E00BF3L, 0xD5A79147L, - 0x06CA6351L, 0x14292967L, 0x27B70A85L, 0x2E1B2138L, 0x4D2C6DFCL, - 0x53380D13L, 0x650A7354L, 0x766A0ABBL, 0x81C2C92EL, 0x92722C85L, - 0xA2BFE8A1L, 0xA81A664BL, 0xC24B8B70L, 0xC76C51A3L, 0xD192E819L, - 0xD6990624L, 0xF40E3585L, 0x106AA070L, 0x19A4C116L, 0x1E376C08L, - 0x2748774CL, 0x34B0BCB5L, 0x391C0CB3L, 0x4ED8AA4AL, 0x5B9CCA4FL, - 0x682E6FF3L, 0x748F82EEL, 0x78A5636FL, 0x84C87814L, 0x8CC70208L, - 0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L - }; - -/* Both versions of Ch and Maj are logically the same, but with the second set - the compilers can recognize them better for optimization */ -#ifdef WOLFSSL_SHA256_BY_SPEC - /* SHA256 math based on specification */ - #define Ch(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) - #define Maj(x,y,z) ((((x) | (y)) & (z)) | ((x) & (y))) -#else - /* SHA256 math reworked for easier compiler optimization */ - #define Ch(x,y,z) ((((y) ^ (z)) & (x)) ^ (z)) - #define Maj(x,y,z) ((((x) ^ (y)) & ((y) ^ (z))) ^ (y)) -#endif - #define R(x, n) (((x) & 0xFFFFFFFFU) >> (n)) - - #define S(x, n) rotrFixed(x, n) - #define Sigma0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22)) - #define Sigma1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25)) - #define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3)) - #define Gamma1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10)) - - #define a(i) S[(0-(i)) & 7] - #define b(i) S[(1-(i)) & 7] - #define c(i) S[(2-(i)) & 7] - #define d(i) S[(3-(i)) & 7] - #define e(i) S[(4-(i)) & 7] - #define f(i) S[(5-(i)) & 7] - #define g(i) S[(6-(i)) & 7] - #define h(i) S[(7-(i)) & 7] - - #ifndef XTRANSFORM - #define XTRANSFORM(S, D) Transform_Sha256((S),(D)) - #endif - -#ifndef SHA256_MANY_REGISTERS - #define RND(j) \ - t0 = h(j) + Sigma1(e(j)) + Ch(e(j), f(j), g(j)) + K[i+(j)] + \ - W[i+(j)]; \ - t1 = Sigma0(a(j)) + Maj(a(j), b(j), c(j)); \ - d(j) += t0; \ - h(j) = t0 + t1 - - static void Transform_Sha256(wc_Sha256* sha256, const byte* data) - { - word32 S[8], t0, t1; - int i; - - #ifdef WOLFSSL_SMALL_STACK_CACHE - word32* W = sha256->W; - if (W == NULL) { - W = (word32*)XMALLOC(sizeof(word32) * WC_SHA256_BLOCK_SIZE, NULL, - DYNAMIC_TYPE_DIGEST); - if (W == NULL) - return MEMORY_E; - sha256->W = W; - } - #elif defined(WOLFSSL_SMALL_STACK) - word32* W; - W = (word32*)XMALLOC(sizeof(word32) * WC_SHA256_BLOCK_SIZE, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (W == NULL) - return MEMORY_E; - #else - word32 W[WC_SHA256_BLOCK_SIZE]; - #endif - - /* Copy context->state[] to working vars */ - for (i = 0; i < 8; i++) - S[i] = sha256->digest[i]; - - for (i = 0; i < 16; i++) - W[i] = *((const word32*)&data[i*(int)sizeof(word32)]); - - for (i = 16; i < WC_SHA256_BLOCK_SIZE; i++) - W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15]) + W[i-16]; - - #ifdef USE_SLOW_SHA256 - /* not unrolled - ~2k smaller and ~25% slower */ - for (i = 0; i < WC_SHA256_BLOCK_SIZE; i += 8) { - int j; - for (j = 0; j < 8; j++) { /* braces needed here for macros {} */ - RND(j); - } - } - #else - /* partially loop unrolled */ - for (i = 0; i < WC_SHA256_BLOCK_SIZE; i += 8) { - RND(0); RND(1); RND(2); RND(3); - RND(4); RND(5); RND(6); RND(7); - } - #endif /* USE_SLOW_SHA256 */ - - /* Add the working vars back into digest state[] */ - for (i = 0; i < 8; i++) { - sha256->digest[i] += S[i]; - } - - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SMALL_STACK_CACHE) - ForceZero(W, sizeof(word32) * WC_SHA256_BLOCK_SIZE); - XFREE(W, NULL, DYNAMIC_TYPE_TMP_BUFFER); - #endif - } -#else - /* SHA256 version that keeps all data in registers */ - #define SCHED1(j) (W[j] = *((word32*)&data[j*sizeof(word32)])) - #define SCHED(j) ( \ - W[ j & 15] += \ - Gamma1(W[(j-2) & 15])+ \ - W[(j-7) & 15] + \ - Gamma0(W[(j-15) & 15]) \ - ) - - #define RND1(j) \ - t0 = h(j) + Sigma1(e(j)) + Ch(e(j), f(j), g(j)) + K[i+j] + SCHED1(j); \ - t1 = Sigma0(a(j)) + Maj(a(j), b(j), c(j)); \ - d(j) += t0; \ - h(j) = t0 + t1 - #define RNDN(j) \ - t0 = h(j) + Sigma1(e(j)) + Ch(e(j), f(j), g(j)) + K[i+j] + SCHED(j); \ - t1 = Sigma0(a(j)) + Maj(a(j), b(j), c(j)); \ - d(j) += t0; \ - h(j) = t0 + t1 - - static void Transform_Sha256(wc_Sha256* sha256, const byte* data) - { - word32 S[8], t0, t1; - int i; - #ifdef USE_SLOW_SHA256 - int j; - #endif - word32 W[WC_SHA256_BLOCK_SIZE/sizeof(word32)]; - - /* Copy digest to working vars */ - S[0] = sha256->digest[0]; - S[1] = sha256->digest[1]; - S[2] = sha256->digest[2]; - S[3] = sha256->digest[3]; - S[4] = sha256->digest[4]; - S[5] = sha256->digest[5]; - S[6] = sha256->digest[6]; - S[7] = sha256->digest[7]; - - i = 0; - #ifdef USE_SLOW_SHA256 - for (j = 0; j < 16; j++) { - RND1(j); - } - for (i = 16; i < 64; i += 16) { - for (j = 0; j < 16; j++) { - RNDN(j); - } - } - #else - RND1( 0); RND1( 1); RND1( 2); RND1( 3); - RND1( 4); RND1( 5); RND1( 6); RND1( 7); - RND1( 8); RND1( 9); RND1(10); RND1(11); - RND1(12); RND1(13); RND1(14); RND1(15); - /* 64 operations, partially loop unrolled */ - for (i = 16; i < 64; i += 16) { - RNDN( 0); RNDN( 1); RNDN( 2); RNDN( 3); - RNDN( 4); RNDN( 5); RNDN( 6); RNDN( 7); - RNDN( 8); RNDN( 9); RNDN(10); RNDN(11); - RNDN(12); RNDN(13); RNDN(14); RNDN(15); - } - #endif - - /* Add the working vars back into digest */ - sha256->digest[0] += S[0]; - sha256->digest[1] += S[1]; - sha256->digest[2] += S[2]; - sha256->digest[3] += S[3]; - sha256->digest[4] += S[4]; - sha256->digest[5] += S[5]; - sha256->digest[6] += S[6]; - sha256->digest[7] += S[7]; - } -#endif /* SHA256_MANY_REGISTERS */ - -static void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, - word32 len) -{ - while (len > 0) { - byte tmp[WC_SHA256_BLOCK_SIZE]; - ByteReverseWords((word32*)tmp, (const word32*)data, - WC_SHA256_BLOCK_SIZE); - Transform_Sha256(sha256, tmp); - data += WC_SHA256_BLOCK_SIZE; - len -= WC_SHA256_BLOCK_SIZE; - } -} - -#else +#else /* */ extern void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len); diff --git a/wolfssl/wolfcrypt/aes.h b/wolfssl/wolfcrypt/aes.h index ab2159abf3..61a3433ea9 100644 --- a/wolfssl/wolfcrypt/aes.h +++ b/wolfssl/wolfcrypt/aes.h @@ -61,7 +61,7 @@ typedef struct Gcm { #endif WOLFSSL_LOCAL void GenerateM0(Gcm* gcm); -#if !defined(__aarch64__) && defined(WOLFSSL_ARMASM) +#ifdef WOLFSSL_ARMASM WOLFSSL_LOCAL void GMULT(byte* X, byte* Y); #endif WOLFSSL_LOCAL void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c, @@ -304,13 +304,6 @@ struct Aes { #ifdef WOLFSSL_AESNI byte use_aesni; #endif /* WOLFSSL_AESNI */ -#if defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \ - !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - byte use_aes_hw_crypto; -#ifdef HAVE_AESGCM - byte use_pmull_hw_crypto; -#endif -#endif /* __aarch64__ && WOLFSSL_ARMASM && !WOLFSSL_ARMASM_NO_HW_CRYPTO */ #ifdef WOLF_CRYPTO_CB int devId; void* devCtx; @@ -839,59 +832,6 @@ WOLFSSL_API int wc_AesEaxFree(AesEax* eax); #endif /* WOLFSSL_AES_EAX */ -#if defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \ - !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) -/* GHASH one block of data. - * - * XOR block into tag and GMULT with H. - * - * @param [in, out] aes AES GCM object. - * @param [in] block Block of AAD or cipher text. - */ -#define GHASH_ONE_BLOCK(aes, block) \ - do { \ - xorbuf(AES_TAG(aes), block, AES_BLOCK_SIZE); \ - GMULT_AARCH64(AES_TAG(aes), aes->gcm.H); \ - } \ - while (0) - -WOLFSSL_LOCAL int AES_set_key_AARCH64(const unsigned char *userKey, - const int keylen, Aes* aes, int dir); -WOLFSSL_LOCAL void AES_encrypt_AARCH64(const byte* inBlock, byte* outBlock, - byte* key, int nr); -WOLFSSL_LOCAL void AES_decrypt_AARCH64(const byte* inBlock, byte* outBlock, - byte* key, int nr); -WOLFSSL_LOCAL void AES_CBC_encrypt_AARCH64(const byte* in, byte* out, word32 sz, - byte* reg, byte* key, int rounds); -WOLFSSL_LOCAL void AES_CBC_decrypt_AARCH64(const byte* in, byte* out, word32 sz, - byte* reg, byte* key, int rounds); -WOLFSSL_LOCAL void AES_CTR_encrypt_AARCH64(Aes* aes, byte* out, const byte* in, - word32 sz); -WOLFSSL_LOCAL void GMULT_AARCH64(byte* X, byte* Y); -#ifdef WOLFSSL_AESGCM_STREAM -WOLFSSL_LOCAL void GHASH_UPDATE_AARCH64(Aes* aes, const byte* a, word32 aSz, - const byte* c, word32 cSz); -WOLFSSL_LOCAL void AES_GCM_init_AARCH64(Aes* aes, const byte* iv, word32 ivSz); -WOLFSSL_LOCAL void AES_GCM_crypt_update_AARCH64(Aes* aes, byte* out, - const byte* in, word32 sz); -WOLFSSL_LOCAL void AES_GCM_final_AARCH64(Aes* aes, byte* authTag, - word32 authTagSz); -#endif -WOLFSSL_LOCAL void AES_GCM_set_key_AARCH64(Aes* aes, byte* iv); -WOLFSSL_LOCAL void AES_GCM_encrypt_AARCH64(Aes* aes, byte* out, const byte* in, - word32 sz, const byte* iv, word32 ivSz, byte* authTag, word32 authTagSz, - const byte* authIn, word32 authInSz); -WOLFSSL_LOCAL int AES_GCM_decrypt_AARCH64(Aes* aes, byte* out, const byte* in, - word32 sz, const byte* iv, word32 ivSz, const byte* authTag, - word32 authTagSz, const byte* authIn, word32 authInSz); - -#ifdef WOLFSSL_AES_XTS -WOLFSSL_LOCAL void AES_XTS_encrypt_AARCH64(XtsAes* xaes, byte* out, - const byte* in, word32 sz, const byte* i); -WOLFSSL_LOCAL void AES_XTS_decrypt_AARCH64(XtsAes* xaes, byte* out, - const byte* in, word32 sz, const byte* i); -#endif /* WOLFSSL_AES_XTS */ -#endif /* __aarch64__ && WOLFSSL_ARMASM && !WOLFSSL_ARMASM_NO_HW_CRYPTO */ #ifdef __cplusplus } /* extern "C" */ diff --git a/wolfssl/wolfcrypt/cpuid.h b/wolfssl/wolfcrypt/cpuid.h index b7a5714798..c91b628b5b 100644 --- a/wolfssl/wolfcrypt/cpuid.h +++ b/wolfssl/wolfcrypt/cpuid.h @@ -38,11 +38,6 @@ #define HAVE_CPUID #define HAVE_CPUID_INTEL #endif -#if (defined(WOLFSSL_AARCH64_BUILD) || (defined(__aarch64__) && \ - defined(WOLFSSL_ARMASM))) && !defined(WOLFSSL_NO_ASM) - #define HAVE_CPUID - #define HAVE_CPUID_AARCH64 -#endif #ifdef HAVE_CPUID_INTEL @@ -68,26 +63,6 @@ #define IS_INTEL_BMI1(f) ((f) & CPUID_BMI1) #define IS_INTEL_SHA(f) ((f) & CPUID_SHA) -#elif defined(HAVE_CPUID_AARCH64) - - #define CPUID_AES 0x0001 - #define CPUID_PMULL 0x0002 - #define CPUID_SHA256 0x0004 - #define CPUID_SHA512 0x0008 - #define CPUID_RDM 0x0010 - #define CPUID_SHA3 0x0020 - #define CPUID_SM3 0x0040 - #define CPUID_SM4 0x0080 - - #define IS_AARCH64_AES(f) ((f) & CPUID_AES) - #define IS_AARCH64_PMULL(f) ((f) & CPUID_PMULL) - #define IS_AARCH64_SHA256(f) ((f) & CPUID_SHA256) - #define IS_AARCH64_SHA512(f) ((f) & CPUID_SHA512) - #define IS_AARCH64_RDM(f) ((f) & CPUID_RDM) - #define IS_AARCH64_SHA3(f) ((f) & CPUID_SHA3) - #define IS_AARCH64_SM3(f) ((f) & CPUID_SM3) - #define IS_AARCH64_SM4(f) ((f) & CPUID_SM4) - #endif #ifdef HAVE_CPUID