Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,9 @@ include/openssl/*.h
/crypto/rc4/
/crypto/ripemd/
/crypto/rsa/
/crypto/sha/
/crypto/sha/*
!/crypto/sha/sha256_aarch64_test.c
!/crypto/sha/sha512_aarch64_test.c
/crypto/sm3/
/crypto/sm4/
/crypto/stack/
Expand Down
20 changes: 12 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,6 @@ if(CMAKE_SYSTEM_NAME MATCHES "SunOS")
endif()

add_definitions(-DLIBRESSL_INTERNAL)
add_definitions(-DOPENSSL_NO_HW_PADLOCK)
add_definitions(-D__BEGIN_HIDDEN_DECLS=)
add_definitions(-D__END_HIDDEN_DECLS=)

Expand Down Expand Up @@ -395,12 +394,14 @@ if (WIN32 AND (NOT CMAKE_GENERATOR_PLATFORM STREQUAL ""))
set(CMAKE_SYSTEM_PROCESSOR "${CMAKE_GENERATOR_PLATFORM}")
endif()

if(CMAKE_SYSTEM_NAME STREQUAL "SunOS" AND CMAKE_SYSTEM_PROCESSOR MATCHES "i386")
set(CMAKE_SYSTEM_PROCESSOR "x86_64")
endif()

if(CMAKE_SYSTEM_PROCESSOR MATCHES "(aarch64|arm64|ARM64)")
set(HOST_AARCH64 true)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
set(HOST_ARM true)
elseif(CMAKE_SYSTEM_NAME STREQUAL "SunOS" AND CMAKE_SYSTEM_PROCESSOR MATCHES "i386")
set(HOST_X86_64 true)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64|x64|amd64|AMD64)")
set(HOST_X86_64 true)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(i[3-6]86|[xX]86|Win32)")
Expand Down Expand Up @@ -428,15 +429,18 @@ if(ENABLE_ASM)
if(CMAKE_C_COMPILER_ABI STREQUAL "ELF")
if(HOST_X86_64)
set(HOST_ASM_ELF_X86_64 true)
elseif(CMAKE_SYSTEM_NAME STREQUAL "SunOS" AND HOST_I386)
set(HOST_ASM_ELF_X86_64 true)
endif()
add_definitions(-DHAVE_GNU_STACK)
elseif(APPLE AND HOST_X86_64)
set(HOST_ASM_MACOSX_X86_64 true)
elseif(MSVC AND (CMAKE_GENERATOR MATCHES "Win64" OR CMAKE_GENERATOR_PLATFORM STREQUAL "x64"))
set(HOST_ASM_MASM_X86_64 true)
ENABLE_LANGUAGE(ASM_MASM)
elseif(MSVC)
if(HOST_AARCH64)
set(HOST_ASM_MASM_ARM64 true)
ENABLE_LANGUAGE(ASM_MARMASM)
elseif(HOST_X86_64)
set(HOST_ASM_MASM_X86_64 true)
ENABLE_LANGUAGE(ASM_MASM)
endif()
elseif(MINGW AND HOST_X86_64)
set(HOST_ASM_MINGW64_X86_64 true)
endif()
Expand Down
26 changes: 16 additions & 10 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,14 @@ AC_ARG_ENABLE([tests],
[enable_tests="yes"])
AM_CONDITIONAL([ENABLE_TESTS], [test "x$enable_tests" = xyes])

# Conditionally enable assembly by default
AS_CASE([$host_cpu],
[arm64], [host_cpu=aarch64],
[*arm*], [host_cpu=arm],
[*amd64*], [host_cpu=x86_64 HOSTARCH=intel],
[i?86], [host_cpu=i386 HOSTARCH=intel enable_asm=no],
[mips64*], [host_cpu=mips64 enable_asm=no],
[mips*], [host_cpu=mips enable_asm=no],
[mips64*], [host_cpu=mips64, enable_asm=no],
[mips*], [host_cpu=mips, enable_asm=no],
[powerpc*], [host_cpu=powerpc],
[ppc64*], [host_cpu=powerpc64],
[x86_64], [HOSTARCH=intel]
Expand Down Expand Up @@ -112,21 +113,26 @@ int main() {return 0;}
])

AC_ARG_ENABLE([asm], AS_HELP_STRING([--disable-asm], [Disable assembly]))
AM_CONDITIONAL([OPENSSL_NO_ASM], [test "x$enable_asm" = "xno"])

# Conditionally enable assembly by default
AM_CONDITIONAL([HOST_ASM_DARWIN_AARCH64],
[test "$HOST_ABI" = "macho" -a "$host_cpu" = "aarch64" -a "x$enable_asm" != "xno"])
AM_CONDITIONAL([HOST_ASM_DARWIN_X86_64],
[test "$HOST_ABI" = "macho" -a "$host_cpu" = "x86_64" -a "x$enable_asm" != "xno"])
AM_CONDITIONAL([HOST_ASM_ELF_AARCH64],
[test "$HOST_ABI" = "elf" -a "$host_cpu" = "aarch64" -a "x$enable_asm" != "xno"])
AM_CONDITIONAL([HOST_ASM_ELF_MIPS],
[test "x$HOST_ABI" = "xelf" -a "$host_cpu" = "mips" -a "x$enable_asm" != "xno"])
[test "$HOST_ABI" = "elf" -a "$host_cpu" = "mips" -a "x$enable_asm" != "xno"])
AM_CONDITIONAL([HOST_ASM_ELF_MIPS64],
[test "x$HOST_ABI" = "xelf" -a "$host_cpu" = "mips64" -a "x$enable_asm" != "xno"])
[test "$HOST_ABI" = "elf" -a "$host_cpu" = "mips64" -a "x$enable_asm" != "xno"])
AM_CONDITIONAL([HOST_ASM_ELF_X86_64],
[test "x$HOST_ABI" = "xelf" -a "$host_cpu" = "x86_64" -a "x$enable_asm" != "xno"])
AM_CONDITIONAL([HOST_ASM_MACOSX_X86_64],
[test "x$HOST_ABI" = "xmacosx" -a "$host_cpu" = "x86_64" -a "x$enable_asm" != "xno"])
[test "$HOST_ABI" = "elf" -a "$host_cpu" = "x86_64" -a "x$enable_asm" != "xno"])
AM_CONDITIONAL([HOST_ASM_MASM_X86_64],
[test "x$HOST_ABI" = "xmasm" -a "$host_cpu" = "x86_64" -a "x$enable_asm" != "xno"])
[test "$HOST_ABI" = "masm" -a "$host_cpu" = "x86_64" -a "x$enable_asm" != "xno"])
AM_CONDITIONAL([HOST_ASM_MINGW64_X86_64],
[test "x$HOST_ABI" = "xmingw64" -a "$host_cpu" = "x86_64" -a "x$enable_asm" != "xno"])
[test "$HOST_ABI" = "mingw64" -a "$host_cpu" = "x86_64" -a "x$enable_asm" != "xno"])

AM_CONDITIONAL([OPENSSL_NO_ASM], [test "x$enable_asm" = "xno"])

AC_CONFIG_FILES([
Makefile
Expand Down
24 changes: 24 additions & 0 deletions crypto/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,36 @@ endif()
if(HOST_AARCH64)
if(APPLE)
set(CRYPTO_SRC ${CRYPTO_SRC} arch/aarch64/crypto_cpu_caps_darwin.c)
set(CRYPTO_SRC ${CRYPTO_SRC} sha/sha256_aarch64_intrinsic.c)
set(CRYPTO_SRC ${CRYPTO_SRC} sha/sha256_aarch64_ce.S)
set(CRYPTO_SRC ${CRYPTO_SRC} sha/sha256_aarch64_test.c)
set(CRYPTO_SRC ${CRYPTO_SRC} sha/sha512_aarch64_intrinsic.c)
set(CRYPTO_SRC ${CRYPTO_SRC} sha/sha512_aarch64_ce.S)
set(CRYPTO_SRC ${CRYPTO_SRC} sha/sha512_aarch64_test.c)
elseif(LINUX)
set(CRYPTO_SRC ${CRYPTO_SRC} arch/aarch64/crypto_cpu_caps_linux.c)
set(CRYPTO_SRC ${CRYPTO_SRC} sha/sha256_aarch64_intrinsic.c)
set(CRYPTO_SRC ${CRYPTO_SRC} sha/sha256_aarch64.c)
set(CRYPTO_SRC ${CRYPTO_SRC} sha/sha512_aarch64_intrinsic.c)
set(CRYPTO_SRC ${CRYPTO_SRC} sha/sha512_aarch64.c)
elseif(CMAKE_SYSTEM_NAME STREQUAL "OpenBSD")
set(CRYPTO_SRC ${CRYPTO_SRC} arch/aarch64/crypto_cpu_caps.c)
set(CRYPTO_SRC ${CRYPTO_SRC} sha/sha256_aarch64_intrinsic.c)
set(CRYPTO_SRC ${CRYPTO_SRC} sha/sha256_aarch64_ce.S)
set(CRYPTO_SRC ${CRYPTO_SRC} sha/sha256_aarch64_test.c)
set(CRYPTO_SRC ${CRYPTO_SRC} sha/sha512_aarch64_intrinsic.c)
set(CRYPTO_SRC ${CRYPTO_SRC} sha/sha512_aarch64_ce.S)
set(CRYPTO_SRC ${CRYPTO_SRC} sha/sha512_aarch64_test.c)
elseif(WIN32)
set(CRYPTO_SRC ${CRYPTO_SRC} arch/aarch64/crypto_cpu_caps_windows.c)
set(CRYPTO_SRC ${CRYPTO_SRC} sha/sha256_aarch64_intrinsic.c)
#set(CRYPTO_SRC ${CRYPTO_SRC} sha/sha256_aarch64_ce.S)
#set_property(SOURCE sha/sha256_aarch64_ce.S PROPERTY LANGUAGE ASM_MARMASM)
set(CRYPTO_SRC ${CRYPTO_SRC} sha/sha256_aarch64_test.c)
set(CRYPTO_SRC ${CRYPTO_SRC} sha/sha512_aarch64_intrinsic.c)
#set(CRYPTO_SRC ${CRYPTO_SRC} sha/sha512_aarch64_ce.S)
#set_property(SOURCE sha/sha512_aarch64_ce.S PROPERTY LANGUAGE ASM_MARMASM)
set(CRYPTO_SRC ${CRYPTO_SRC} sha/sha512_aarch64_test.c)
else()
set(CRYPTO_SRC ${CRYPTO_SRC} arch/aarch64/crypto_cpu_caps_none.c)
endif()
Expand Down
28 changes: 25 additions & 3 deletions crypto/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ libcrypto_la_LIBADD += libcompatnoopt.la
endif
libcrypto_la_CPPFLAGS = -I$(top_srcdir)/crypto/hidden ${AM_CPPFLAGS}
libcrypto_la_CPPFLAGS += -DLIBRESSL_INTERNAL
libcrypto_la_CPPFLAGS += -DOPENSSL_NO_HW_PADLOCK

if OPENSSLDIR_DEFINED
libcrypto_la_CPPFLAGS += -DOPENSSLDIR=\"@OPENSSLDIR@\"
Expand Down Expand Up @@ -200,17 +199,19 @@ include Makefile.am.arc4random
libcrypto_la_SOURCES =
EXTRA_libcrypto_la_SOURCES =

include Makefile.am.darwin-x86_64
include Makefile.am.elf-mips
include Makefile.am.elf-mips64
include Makefile.am.elf-x86_64
include Makefile.am.macosx-x86_64
include Makefile.am.masm-x86_64
include Makefile.am.mingw64-x86_64

if !HOST_ASM_ELF_AARCH64
if !HOST_ASM_ELF_MIPS
if !HOST_ASM_ELF_MIPS64
if !HOST_ASM_ELF_X86_64
if !HOST_ASM_MACOSX_X86_64
if !HOST_ASM_DARWIN_AARCH64
if !HOST_ASM_DARWIN_X86_64
if !HOST_ASM_MASM_X86_64
if !HOST_ASM_MINGW64_X86_64
libcrypto_la_CPPFLAGS += -DOPENSSL_NO_ASM
Expand All @@ -220,16 +221,36 @@ endif
endif
endif
endif
endif
endif

if HOST_AARCH64
if HOST_DARWIN
libcrypto_la_SOURCES += arch/aarch64/crypto_cpu_caps_darwin.c
libcrypto_la_SOURCES += sha/sha256_aarch64_intrinsic.c
libcrypto_la_SOURCES += sha/sha256_aarch64_ce.S
libcrypto_la_SOURCES += sha/sha256_aarch64_test.c
libcrypto_la_SOURCES += sha/sha512_aarch64_intrinsic.c
libcrypto_la_SOURCES += sha/sha512_aarch64_ce.S
libcrypto_la_SOURCES += sha/sha512_aarch64_test.c
else
if HOST_LINUX
libcrypto_la_SOURCES += arch/aarch64/crypto_cpu_caps_linux.c
libcrypto_la_SOURCES += sha/sha256_aarch64_intrinsic.c
libcrypto_la_SOURCES += sha/sha256_aarch64_ce.S
libcrypto_la_SOURCES += sha/sha256_aarch64.c
libcrypto_la_SOURCES += sha/sha512_aarch64_intrinsic.c
libcrypto_la_SOURCES += sha/sha512_aarch64_ce.S
libcrypto_la_SOURCES += sha/sha512_aarch64.c
else
if HOST_OPENBSD
libcrypto_la_SOURCES += arch/aarch64/crypto_cpu_caps.c
libcrypto_la_SOURCES += sha/sha256_aarch64_intrinsic.c
libcrypto_la_SOURCES += sha/sha256_aarch64_ce.S
libcrypto_la_SOURCES += sha/sha256_aarch64.c
libcrypto_la_SOURCES += sha/sha512_aarch64_intrinsic.c
libcrypto_la_SOURCES += sha/sha512_aarch64_ce.S
libcrypto_la_SOURCES += sha/sha512_aarch64.c
else
libcrypto_la_SOURCES += arch/aarch64/crypto_cpu_caps_none.c
endif
Expand All @@ -240,6 +261,7 @@ endif
if HOST_X86_64
libcrypto_la_SOURCES += arch/amd64/crypto_cpu_caps.c
endif

if HOST_I386
libcrypto_la_SOURCES += arch/i386/crypto_cpu_caps.c
endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ ASM_X86_64_MACOSX += bn/arch/amd64/bn_arch.c

EXTRA_DIST += $(ASM_X86_64_MACOSX)

if HOST_ASM_MACOSX_X86_64
if HOST_ASM_DARWIN_X86_64
libcrypto_la_CPPFLAGS += -DAES_ASM
libcrypto_la_CPPFLAGS += -DBSAES_ASM
libcrypto_la_CPPFLAGS += -DVPAES_ASM
Expand Down
153 changes: 153 additions & 0 deletions crypto/sha/sha256_aarch64_intrinsic.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
/* $OpenBSD: $ */
/*
* Copyright (c) 2023,2025 Joel Sing <[email protected]>
* Copyright (c) 2025 Brent Cook <[email protected]>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/

#include <stdint.h>

#include <arm_neon.h>
#ifndef _WIN32
#include <arm_acle.h>
#endif

#include <openssl/sha.h>

/*
* SHA-256 implementation using the ARM Cryptographic Extension (CE).
*
* There are four instructions that enable hardware acceleration of SHA-256,
* however the documentation for these is woefully inadequate:
*
* sha256h: hash update - part 1 (without a number to be inconsistent)
* sha256h2: hash update - part 2
* sha256su0: message schedule update with sigma0 for four rounds
* sha256su1: message schedule update with sigma1 for four rounds
*/

/*
* SHA-256 constants - see FIPS 180-4 section 4.2.3.
*/
static const uint32_t k256[] =
{
0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5,
0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3,
0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC,
0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7,
0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13,
0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3,
0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5,
0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208,
0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2,
};

/*
* Update message schedule for m0 (W0:W1:W2:W3), using m1 (W4:W5:W6:W7),
* m2 (W8:W9:W10:11) and m3 (W12:W13:W14:W15). The sha256su0 instruction
* computes the sigma0 component of the message schedule update as:
* W0:W1:W2:W3 = sigma0(W1:W2:W3:W4) + W0:W1:W2:W3
* while sha256su1 computes the sigma1 component and adds in W9 as:
* W0:W1:W2:W3 = sigma1(W14:W15:W0:W1) + W9:W10:W12:W13 + W0:W1:W2:W3
*/
#define sha256_round(h0, h1, w, k) \
do { \
uint32x4_t tmp0 = vaddq_u32(w, k); \
uint32x4_t tmp1 = h0; \
h0 = vsha256hq_u32(h0, h1, tmp0); \
h1 = vsha256h2q_u32(h1, tmp1, tmp0); \
} while(0)

/*
* Compute four SHA-256 rounds by adding W0:W1:W2:W3 + K0:K1:K2:K3, then
* computing the remainder of each round (including the shuffle) via
* sha256h/sha256h2.
*/
#define sha256_round_update(h0, h1, m0, m1, m2, m3, k) \
m0 = vsha256su0q_u32(m0, m1); \
m0 = vsha256su1q_u32(m0, m2, m3); \
sha256_round(h0, h1, m0, k)

void
sha256_block_intrinsic(SHA256_CTX *ctx, const void *in, size_t num)
{
uint32_t *state = (uint32_t *)ctx->h;
const uint8_t *data = in;

/* Load state */
uint32x4_t hc0 = vld1q_u32(&state[0]);
uint32x4_t hc1 = vld1q_u32(&state[4]);
volatile uint32x4x4_t k;

while (num >= 1)
{
/* Copy current hash state. */
uint32x4_t hs0 = hc0;
uint32x4_t hs1 = hc1;

/* Load and byte swap message schedule */
uint32x4x4_t msg = vld1q_u32_x4((const uint32_t *)data);
msg.val[0] = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(msg.val[0])));
msg.val[1] = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(msg.val[1])));
msg.val[2] = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(msg.val[2])));
msg.val[3] = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(msg.val[3])));

/* Rounds 0 through 15 (four rounds at a time). */
k = vld1q_u32_x4(k256);
sha256_round(hs0, hs1, msg.val[0], k.val[0]);
sha256_round(hs0, hs1, msg.val[1], k.val[1]);
sha256_round(hs0, hs1, msg.val[2], k.val[2]);
sha256_round(hs0, hs1, msg.val[3], k.val[3]);

/* Rounds 16 through 31 (four rounds at a time). */
k = vld1q_u32_x4(k256 + 16);
sha256_round_update(hs0, hs1, msg.val[0], msg.val[1], msg.val[2], msg.val[3], k.val[0]);
sha256_round_update(hs0, hs1, msg.val[1], msg.val[2], msg.val[3], msg.val[0], k.val[1]);
sha256_round_update(hs0, hs1, msg.val[2], msg.val[3], msg.val[0], msg.val[1], k.val[2]);
sha256_round_update(hs0, hs1, msg.val[3], msg.val[0], msg.val[1], msg.val[2], k.val[3]);

/* Rounds 32 through 47 (four rounds at a time). */
k = vld1q_u32_x4(k256 + 32);
sha256_round_update(hs0, hs1, msg.val[0], msg.val[1], msg.val[2], msg.val[3], k.val[0]);
sha256_round_update(hs0, hs1, msg.val[1], msg.val[2], msg.val[3], msg.val[0], k.val[1]);
sha256_round_update(hs0, hs1, msg.val[2], msg.val[3], msg.val[0], msg.val[1], k.val[2]);
sha256_round_update(hs0, hs1, msg.val[3], msg.val[0], msg.val[1], msg.val[2], k.val[3]);

/* Rounds 48 through 63 (four rounds at a time). */
k = vld1q_u32_x4(k256 + 48);
sha256_round_update(hs0, hs1, msg.val[0], msg.val[1], msg.val[2], msg.val[3], k.val[0]);
sha256_round_update(hs0, hs1, msg.val[1], msg.val[2], msg.val[3], msg.val[0], k.val[1]);
sha256_round_update(hs0, hs1, msg.val[2], msg.val[3], msg.val[0], msg.val[1], k.val[2]);
sha256_round_update(hs0, hs1, msg.val[3], msg.val[0], msg.val[1], msg.val[2], k.val[3]);

/* Add intermediate state to hash state. */
hc0 = vaddq_u32(hs0, hc0);
hc1 = vaddq_u32(hs1, hc1);

data += 64;
num -= 1;
}

/* Save state */
vst1q_u32(&state[0], hc0);
vst1q_u32(&state[4], hc1);
}

Loading