Skip to content

Commit

Permalink
Merge pull request #1650 from briansmith/b/merge-boringssl-10
Browse files Browse the repository at this point in the history
Merge BoringSSL through 4d7b383
  • Loading branch information
briansmith committed Sep 24, 2023
2 parents 6e9f1b7 + 65fb8b8 commit ad59665
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 38 deletions.
18 changes: 8 additions & 10 deletions crypto/fipsmodule/aes/aes_nohw.c
Original file line number Diff line number Diff line change
Expand Up @@ -912,29 +912,27 @@ void aes_nohw_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out,
aes_nohw_expand_round_keys(&sched, key);

// Make |AES_NOHW_BATCH_SIZE| copies of |ivec|.
alignas(AES_NOHW_WORD_SIZE) union {
uint32_t u32[AES_NOHW_BATCH_SIZE * 4];
uint8_t u8[AES_NOHW_BATCH_SIZE * 16];
} ivs, enc_ivs;
alignas(AES_NOHW_WORD_SIZE) uint8_t ivs[AES_NOHW_BATCH_SIZE * 16];
alignas(AES_NOHW_WORD_SIZE) uint8_t enc_ivs[AES_NOHW_BATCH_SIZE * 16];
for (size_t i = 0; i < AES_NOHW_BATCH_SIZE; i++) {
OPENSSL_memcpy(ivs.u8 + 16 * i, ivec, 16);
OPENSSL_memcpy(ivs + 16 * i, ivec, 16);
}

uint32_t ctr = CRYPTO_bswap4(ivs.u32[3]);
uint32_t ctr = CRYPTO_load_u32_be(ivs + 12);
for (;;) {
// Update counters.
for (uint32_t i = 0; i < AES_NOHW_BATCH_SIZE; i++) {
ivs.u32[4 * i + 3] = CRYPTO_bswap4(ctr + i);
CRYPTO_store_u32_be(ivs + 16 * i + 12, ctr + i);
}

size_t todo = blocks >= AES_NOHW_BATCH_SIZE ? AES_NOHW_BATCH_SIZE : blocks;
AES_NOHW_BATCH batch;
aes_nohw_to_batch(&batch, ivs.u8, todo);
aes_nohw_to_batch(&batch, ivs, todo);
aes_nohw_encrypt_batch(&sched, key->rounds, &batch);
aes_nohw_from_batch(enc_ivs.u8, todo, &batch);
aes_nohw_from_batch(enc_ivs, todo, &batch);

for (size_t i = 0; i < todo; i++) {
aes_nohw_xor_block(out + 16 * i, in + 16 * i, enc_ivs.u8 + 16 * i);
aes_nohw_xor_block(out + 16 * i, in + 16 * i, enc_ivs + 16 * i);
}

blocks -= todo;
Expand Down
67 changes: 67 additions & 0 deletions crypto/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,10 @@ static inline crypto_word constant_time_select_w(crypto_word mask,
static inline uint32_t CRYPTO_bswap4(uint32_t x) {
return __builtin_bswap32(x);
}

static inline uint64_t CRYPTO_bswap8(uint64_t x) {
return __builtin_bswap64(x);
}
#elif defined(_MSC_VER)
#pragma warning(push, 3)
#include <stdlib.h>
Expand All @@ -272,6 +276,10 @@ static inline uint32_t CRYPTO_bswap4(uint32_t x) {
static inline uint32_t CRYPTO_bswap4(uint32_t x) {
return _byteswap_ulong(x);
}

static inline uint64_t CRYPTO_bswap8(uint64_t x) {
return _byteswap_uint64(x);
}
#endif

#if !defined(RING_CORE_NOSTDLIBINC)
Expand Down Expand Up @@ -310,6 +318,65 @@ static inline void *OPENSSL_memset(void *dst, int c, size_t n) {
}


// Loads and stores.
//
// The following functions load and store sized integers with the specified
// endianness. They use |memcpy|, and so avoid alignment or strict aliasing
// requirements on the input and output pointers.

static inline uint32_t CRYPTO_load_u32_le(const void *in) {
uint32_t v;
OPENSSL_memcpy(&v, in, sizeof(v));
return v;
}

static inline void CRYPTO_store_u32_le(void *out, uint32_t v) {
OPENSSL_memcpy(out, &v, sizeof(v));
}

static inline uint32_t CRYPTO_load_u32_be(const void *in) {
uint32_t v;
OPENSSL_memcpy(&v, in, sizeof(v));
return CRYPTO_bswap4(v);
}

static inline void CRYPTO_store_u32_be(void *out, uint32_t v) {
v = CRYPTO_bswap4(v);
OPENSSL_memcpy(out, &v, sizeof(v));
}

static inline uint64_t CRYPTO_load_u64_le(const void *in) {
uint64_t v;
OPENSSL_memcpy(&v, in, sizeof(v));
return v;
}

static inline void CRYPTO_store_u64_le(void *out, uint64_t v) {
OPENSSL_memcpy(out, &v, sizeof(v));
}

static inline uint64_t CRYPTO_load_u64_be(const void *ptr) {
uint64_t ret;
OPENSSL_memcpy(&ret, ptr, sizeof(ret));
return CRYPTO_bswap8(ret);
}

static inline void CRYPTO_store_u64_be(void *out, uint64_t v) {
v = CRYPTO_bswap8(v);
OPENSSL_memcpy(out, &v, sizeof(v));
}

static inline crypto_word CRYPTO_load_word_le(const void *in) {
crypto_word v;
OPENSSL_memcpy(&v, in, sizeof(v));
return v;
}

static inline void CRYPTO_store_word_le(void *out, crypto_word v) {
OPENSSL_memcpy(out, &v, sizeof(v));
}


// Runtime CPU feature support

#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
Expand Down
40 changes: 12 additions & 28 deletions crypto/poly1305/poly1305_vec.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,6 @@

#include <emmintrin.h>

static uint32_t load_u32_le(const uint8_t in[4]) {
uint32_t ret;
OPENSSL_memcpy(&ret, in, 4);
return ret;
}

static uint64_t load_u64_le(const uint8_t in[8]) {
uint64_t ret;
OPENSSL_memcpy(&ret, in, 8);
return ret;
}

static void store_u64_le(uint8_t out[8], uint64_t v) {
OPENSSL_memcpy(out, &v, 8);
}

typedef __m128i xmmi;

static const alignas(16) uint32_t poly1305_x64_sse2_message_mask[4] = {
Expand Down Expand Up @@ -117,8 +101,8 @@ void CRYPTO_poly1305_init(poly1305_state *state, const uint8_t key[32]) {
uint64_t t0, t1;

// clamp key
t0 = load_u64_le(key + 0);
t1 = load_u64_le(key + 8);
t0 = CRYPTO_load_u64_le(key + 0);
t1 = CRYPTO_load_u64_le(key + 8);
r0 = t0 & 0xffc0fffffff;
t0 >>= 44;
t0 |= t1 << 20;
Expand All @@ -136,10 +120,10 @@ void CRYPTO_poly1305_init(poly1305_state *state, const uint8_t key[32]) {
p->R22.d[3] = (uint32_t)(r2 >> 32);

// store pad
p->R23.d[1] = load_u32_le(key + 16);
p->R23.d[3] = load_u32_le(key + 20);
p->R24.d[1] = load_u32_le(key + 24);
p->R24.d[3] = load_u32_le(key + 28);
p->R23.d[1] = CRYPTO_load_u32_le(key + 16);
p->R23.d[3] = CRYPTO_load_u32_le(key + 20);
p->R24.d[1] = CRYPTO_load_u32_le(key + 24);
p->R24.d[3] = CRYPTO_load_u32_le(key + 28);

// H = 0
st->H[0] = _mm_setzero_si128();
Expand Down Expand Up @@ -771,8 +755,8 @@ void CRYPTO_poly1305_finish(poly1305_state *state, uint8_t mac[16]) {
}

poly1305_donna_atleast16bytes:
t0 = load_u64_le(m + 0);
t1 = load_u64_le(m + 8);
t0 = CRYPTO_load_u64_le(m + 0);
t1 = CRYPTO_load_u64_le(m + 8);
h0 += t0 & 0xfffffffffff;
t0 = shr128_pair(t1, t0, 44);
h1 += t0 & 0xfffffffffff;
Expand Down Expand Up @@ -811,8 +795,8 @@ void CRYPTO_poly1305_finish(poly1305_state *state, uint8_t mac[16]) {
OPENSSL_memset(m + leftover, 0, 16 - leftover);
leftover = 16;

t0 = load_u64_le(m + 0);
t1 = load_u64_le(m + 8);
t0 = CRYPTO_load_u64_le(m + 0);
t1 = CRYPTO_load_u64_le(m + 8);
h0 += t0 & 0xfffffffffff;
t0 = shr128_pair(t1, t0, 44);
h1 += t0 & 0xfffffffffff;
Expand Down Expand Up @@ -858,8 +842,8 @@ void CRYPTO_poly1305_finish(poly1305_state *state, uint8_t mac[16]) {
t1 = (t1 >> 24);
h2 += (t1)+c;

store_u64_le(mac + 0, ((h0) | (h1 << 44)));
store_u64_le(mac + 8, ((h1 >> 20) | (h2 << 24)));
CRYPTO_store_u64_le(mac + 0, ((h0) | (h1 << 44)));
CRYPTO_store_u64_le(mac + 8, ((h1 >> 20) | (h2 << 24)));
}

#endif // BORINGSSL_HAS_UINT128 && OPENSSL_X86_64

0 comments on commit ad59665

Please sign in to comment.