diff --git a/crypto/fipsmodule/aes/aes_nohw.c b/crypto/fipsmodule/aes/aes_nohw.c index 308c7cca1c..a86f468cdf 100644 --- a/crypto/fipsmodule/aes/aes_nohw.c +++ b/crypto/fipsmodule/aes/aes_nohw.c @@ -912,29 +912,27 @@ void aes_nohw_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, aes_nohw_expand_round_keys(&sched, key); // Make |AES_NOHW_BATCH_SIZE| copies of |ivec|. - alignas(AES_NOHW_WORD_SIZE) union { - uint32_t u32[AES_NOHW_BATCH_SIZE * 4]; - uint8_t u8[AES_NOHW_BATCH_SIZE * 16]; - } ivs, enc_ivs; + alignas(AES_NOHW_WORD_SIZE) uint8_t ivs[AES_NOHW_BATCH_SIZE * 16]; + alignas(AES_NOHW_WORD_SIZE) uint8_t enc_ivs[AES_NOHW_BATCH_SIZE * 16]; for (size_t i = 0; i < AES_NOHW_BATCH_SIZE; i++) { - OPENSSL_memcpy(ivs.u8 + 16 * i, ivec, 16); + OPENSSL_memcpy(ivs + 16 * i, ivec, 16); } - uint32_t ctr = CRYPTO_bswap4(ivs.u32[3]); + uint32_t ctr = CRYPTO_load_u32_be(ivs + 12); for (;;) { // Update counters. for (uint32_t i = 0; i < AES_NOHW_BATCH_SIZE; i++) { - ivs.u32[4 * i + 3] = CRYPTO_bswap4(ctr + i); + CRYPTO_store_u32_be(ivs + 16 * i + 12, ctr + i); } size_t todo = blocks >= AES_NOHW_BATCH_SIZE ? AES_NOHW_BATCH_SIZE : blocks; AES_NOHW_BATCH batch; - aes_nohw_to_batch(&batch, ivs.u8, todo); + aes_nohw_to_batch(&batch, ivs, todo); aes_nohw_encrypt_batch(&sched, key->rounds, &batch); - aes_nohw_from_batch(enc_ivs.u8, todo, &batch); + aes_nohw_from_batch(enc_ivs, todo, &batch); for (size_t i = 0; i < todo; i++) { - aes_nohw_xor_block(out + 16 * i, in + 16 * i, enc_ivs.u8 + 16 * i); + aes_nohw_xor_block(out + 16 * i, in + 16 * i, enc_ivs + 16 * i); } blocks -= todo; diff --git a/crypto/internal.h b/crypto/internal.h index dba229c399..cb2f703cc4 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -264,6 +264,10 @@ static inline crypto_word constant_time_select_w(crypto_word mask, static inline uint32_t CRYPTO_bswap4(uint32_t x) { return __builtin_bswap32(x); } + +static inline uint64_t CRYPTO_bswap8(uint64_t x) { + return __builtin_bswap64(x); +} #elif defined(_MSC_VER) #pragma warning(push, 3) #include @@ -272,6 +276,10 @@ static inline uint32_t CRYPTO_bswap4(uint32_t x) { static inline uint32_t CRYPTO_bswap4(uint32_t x) { return _byteswap_ulong(x); } + +static inline uint64_t CRYPTO_bswap8(uint64_t x) { + return _byteswap_uint64(x); +} #endif #if !defined(RING_CORE_NOSTDLIBINC) @@ -310,6 +318,65 @@ static inline void *OPENSSL_memset(void *dst, int c, size_t n) { } +// Loads and stores. +// +// The following functions load and store sized integers with the specified +// endianness. They use |memcpy|, and so avoid alignment or strict aliasing +// requirements on the input and output pointers. + +static inline uint32_t CRYPTO_load_u32_le(const void *in) { + uint32_t v; + OPENSSL_memcpy(&v, in, sizeof(v)); + return v; +} + +static inline void CRYPTO_store_u32_le(void *out, uint32_t v) { + OPENSSL_memcpy(out, &v, sizeof(v)); +} + +static inline uint32_t CRYPTO_load_u32_be(const void *in) { + uint32_t v; + OPENSSL_memcpy(&v, in, sizeof(v)); + return CRYPTO_bswap4(v); +} + +static inline void CRYPTO_store_u32_be(void *out, uint32_t v) { + v = CRYPTO_bswap4(v); + OPENSSL_memcpy(out, &v, sizeof(v)); +} + +static inline uint64_t CRYPTO_load_u64_le(const void *in) { + uint64_t v; + OPENSSL_memcpy(&v, in, sizeof(v)); + return v; +} + +static inline void CRYPTO_store_u64_le(void *out, uint64_t v) { + OPENSSL_memcpy(out, &v, sizeof(v)); +} + +static inline uint64_t CRYPTO_load_u64_be(const void *ptr) { + uint64_t ret; + OPENSSL_memcpy(&ret, ptr, sizeof(ret)); + return CRYPTO_bswap8(ret); +} + +static inline void CRYPTO_store_u64_be(void *out, uint64_t v) { + v = CRYPTO_bswap8(v); + OPENSSL_memcpy(out, &v, sizeof(v)); +} + +static inline crypto_word CRYPTO_load_word_le(const void *in) { + crypto_word v; + OPENSSL_memcpy(&v, in, sizeof(v)); + return v; +} + +static inline void CRYPTO_store_word_le(void *out, crypto_word v) { + OPENSSL_memcpy(out, &v, sizeof(v)); +} + + // Runtime CPU feature support #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) diff --git a/crypto/poly1305/poly1305_vec.c b/crypto/poly1305/poly1305_vec.c index 610a8c9512..08ae68bad4 100644 --- a/crypto/poly1305/poly1305_vec.c +++ b/crypto/poly1305/poly1305_vec.c @@ -31,22 +31,6 @@ #include -static uint32_t load_u32_le(const uint8_t in[4]) { - uint32_t ret; - OPENSSL_memcpy(&ret, in, 4); - return ret; -} - -static uint64_t load_u64_le(const uint8_t in[8]) { - uint64_t ret; - OPENSSL_memcpy(&ret, in, 8); - return ret; -} - -static void store_u64_le(uint8_t out[8], uint64_t v) { - OPENSSL_memcpy(out, &v, 8); -} - typedef __m128i xmmi; static const alignas(16) uint32_t poly1305_x64_sse2_message_mask[4] = { @@ -117,8 +101,8 @@ void CRYPTO_poly1305_init(poly1305_state *state, const uint8_t key[32]) { uint64_t t0, t1; // clamp key - t0 = load_u64_le(key + 0); - t1 = load_u64_le(key + 8); + t0 = CRYPTO_load_u64_le(key + 0); + t1 = CRYPTO_load_u64_le(key + 8); r0 = t0 & 0xffc0fffffff; t0 >>= 44; t0 |= t1 << 20; @@ -136,10 +120,10 @@ void CRYPTO_poly1305_init(poly1305_state *state, const uint8_t key[32]) { p->R22.d[3] = (uint32_t)(r2 >> 32); // store pad - p->R23.d[1] = load_u32_le(key + 16); - p->R23.d[3] = load_u32_le(key + 20); - p->R24.d[1] = load_u32_le(key + 24); - p->R24.d[3] = load_u32_le(key + 28); + p->R23.d[1] = CRYPTO_load_u32_le(key + 16); + p->R23.d[3] = CRYPTO_load_u32_le(key + 20); + p->R24.d[1] = CRYPTO_load_u32_le(key + 24); + p->R24.d[3] = CRYPTO_load_u32_le(key + 28); // H = 0 st->H[0] = _mm_setzero_si128(); @@ -771,8 +755,8 @@ void CRYPTO_poly1305_finish(poly1305_state *state, uint8_t mac[16]) { } poly1305_donna_atleast16bytes: - t0 = load_u64_le(m + 0); - t1 = load_u64_le(m + 8); + t0 = CRYPTO_load_u64_le(m + 0); + t1 = CRYPTO_load_u64_le(m + 8); h0 += t0 & 0xfffffffffff; t0 = shr128_pair(t1, t0, 44); h1 += t0 & 0xfffffffffff; @@ -811,8 +795,8 @@ void CRYPTO_poly1305_finish(poly1305_state *state, uint8_t mac[16]) { OPENSSL_memset(m + leftover, 0, 16 - leftover); leftover = 16; - t0 = load_u64_le(m + 0); - t1 = load_u64_le(m + 8); + t0 = CRYPTO_load_u64_le(m + 0); + t1 = CRYPTO_load_u64_le(m + 8); h0 += t0 & 0xfffffffffff; t0 = shr128_pair(t1, t0, 44); h1 += t0 & 0xfffffffffff; @@ -858,8 +842,8 @@ void CRYPTO_poly1305_finish(poly1305_state *state, uint8_t mac[16]) { t1 = (t1 >> 24); h2 += (t1)+c; - store_u64_le(mac + 0, ((h0) | (h1 << 44))); - store_u64_le(mac + 8, ((h1 >> 20) | (h2 << 24))); + CRYPTO_store_u64_le(mac + 0, ((h0) | (h1 << 44))); + CRYPTO_store_u64_le(mac + 8, ((h1 >> 20) | (h2 << 24))); } #endif // BORINGSSL_HAS_UINT128 && OPENSSL_X86_64