Skip to content

Commit

Permalink
Merge pull request #513 from danielinux/benchmark
Browse files Browse the repository at this point in the history
Added benchmark script, enabled ARMASM for Cortex-M3,4,7,33
  • Loading branch information
dgarske authored Oct 25, 2024
2 parents 518909e + c1fbfd6 commit 787a21b
Show file tree
Hide file tree
Showing 10 changed files with 210 additions and 36 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/test-configs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,28 +70,28 @@ jobs:
with:
arch: arm
config-file: ./config/examples/imx-rt1040.config
make-args: PKA=1
make-args: PKA=1 NO_ARM_ASM=1

imx_rt1050_test_pka:
uses: ./.github/workflows/test-build-mcux-sdk.yml
with:
arch: arm
config-file: ./config/examples/imx-rt1050.config
make-args: PKA=1
make-args: PKA=1 NO_ARM_ASM=1

imx_rt1060_test_pka:
uses: ./.github/workflows/test-build-mcux-sdk.yml
with:
arch: arm
config-file: ./config/examples/imx-rt1060.config
make-args: PKA=1
make-args: PKA=1 NO_ARM_ASM=1

imx_rt1064_test_pka:
uses: ./.github/workflows/test-build-mcux-sdk.yml
with:
arch: arm
config-file: ./config/examples/imx-rt1064.config
make-args: PKA=1
make-args: PKA=1 NO_ARM_ASM=1

kinetis_k64f_test:
uses: ./.github/workflows/test-build-mcux-sdk.yml
Expand Down
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,7 @@ clean:
$(Q)rm -f $(MACHINE_OBJ) $(MAIN_TARGET) $(LSCRIPT)
$(Q)rm -f $(OBJS)
$(Q)rm -f tools/keytools/otp/otp-keystore-gen
$(Q)rm -f .stack_usage
$(Q)$(MAKE) -C test-app -s clean
$(Q)$(MAKE) -C tools/check_config -s clean
$(Q)$(MAKE) -C stage1 -s clean
Expand Down Expand Up @@ -385,6 +386,13 @@ line-count-nrf52:
line-count-x86:
cloc --force-lang-def cloc_lang_def.txt src/boot_x86_fsp.c src/boot_x86_fsp_payload.c src/boot_x86_fsp_start.S src/image.c src/keystore.c src/libwolfboot.c src/loader.c src/string.c src/update_disk.c src/x86/ahci.c src/x86/ata.c src/x86/common.c src/x86/gpt.c src/x86/hob.c src/pci.c src/x86/tgl_fsp.c hal/x86_fsp_tgl.c hal/x86_uart.c

stack-usage: wolfboot.bin
$(Q)echo $(STACK_USAGE) > .stack_usage

image-header-size: wolfboot.bin
$(Q)echo $(IMAGE_HEADER_SIZE) > .image_header_size


cppcheck:
cppcheck -f --enable=warning --enable=portability \
--suppress="ctunullpointer" --suppress="nullPointer" \
Expand Down
55 changes: 44 additions & 11 deletions arch.mk
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,8 @@ ifeq ($(ARCH),ARM)
ifeq ($(CORTEX_A5),1)
FPU=-mfpu=vfp4-d16
CFLAGS+=-mcpu=cortex-a5 -mtune=cortex-a5 -static -z noexecstack
LDLAGS+=-mcpu=cortex-a5 -mtune=cortex-a5 -mtune=cortex-a5 -static -z noexecstack -Ttext 0x300000
LDLAGS+=-mcpu=cortex-a5 -mtune=cortex-a5 -mtune=cortex-a5 -static \
-z noexecstack -Ttext 0x300000
# Cortex-A uses boot_arm32.o
OBJS+=src/boot_arm32.o src/boot_arm32_start.o
ifeq ($(NO_ASM),1)
Expand All @@ -198,11 +199,37 @@ ifeq ($(CORTEX_A5),1)
OBJS+=./lib/wolfssl/wolfcrypt/src/port/arm/armv8-sha256.o
OBJS+=./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha256-asm.o
OBJS+=./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.o
CFLAGS+=-DWOLFSSL_SP_ARM32_ASM -DWOLFSSL_ARMASM -DWOLFSSL_ARMASM_NO_HW_CRYPTO -DWOLFSSL_ARM_ARCH=7 -DWOLFSSL_ARMASM_INLINE -DWOLFSSL_ARMASM_NO_NEON
CFLAGS+=-DWOLFSSL_SP_ARM32_ASM -DWOLFSSL_ARMASM -DWOLFSSL_ARMASM_NO_HW_CRYPTO \
-DWOLFSSL_ARM_ARCH=7 -DWOLFSSL_ARMASM_INLINE -DWOLFSSL_ARMASM_NO_NEON
endif
else
# All others use boot_arm.o
OBJS+=src/boot_arm.o
ifneq ($(NO_ARM_ASM),1)
CORTEXM_ARM_EXTRA_OBJS= \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-aes.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-chacha.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-sha256.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-sha512.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-aes-asm.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha256-asm.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha512-asm.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha3-asm.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-chacha-asm.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.o


CORTEXM_ARM_THUMB_EXTRA_OBJS= \
./lib/wolfssl/wolfcrypt/src/port/arm/thumb2-sha256-asm.o \
./lib/wolfssl/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.o

CORTEXM_ARM_EXTRA_CFLAGS+=-DWOLFSSL_ARMASM -DWOLFSSL_ARMASM_NO_HW_CRYPTO \
-DWOLFSSL_ARMASM_INLINE -DWOLFSSL_ARMASM_NO_NEON
endif
ifeq ($(CORTEX_M33),1)
CFLAGS+=-mcpu=cortex-m33 -DCORTEX_M33
LDFLAGS+=-mcpu=cortex-m33
Expand All @@ -212,28 +239,25 @@ else
endif
CFLAGS+=-mcmse
ifeq ($(WOLFCRYPT_TZ),1)
CORTEXM_ARM_EXTRA_OBJS=
CORTEXM_ARM_EXTRA_CFLAGS=
SECURE_OBJS+=./src/wc_callable.o
SECURE_OBJS+=./lib/wolfssl/wolfcrypt/src/random.o
CFLAGS+=-DWOLFCRYPT_SECURE_MODE
SECURE_LDFLAGS+=-Wl,--cmse-implib -Wl,--out-implib=./src/wc_secure_calls.o
endif
endif # TZEN=1
ifeq ($(NO_ASM),1)
ifeq ($(SPMATH),1)
ifeq ($(NO_ASM),1)
MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_c32.o
else
CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM
MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o
CFLAGS+=$(CORTEXM_ARM_EXTRA_CFLAGS) -DWOLFSSL_ARM_ARCH=8
OBJS+=$(CORTEXM_ARM_EXTRA_OBJS)
endif
endif
else
ifeq ($(SPMATH),1)
CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM
MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o
endif
endif
else
ifeq ($(CORTEX_M7),1)
CFLAGS+=-mcpu=cortex-m7
LDFLAGS+=-mcpu=cortex-m7
Expand All @@ -243,10 +267,12 @@ else
else
CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM
MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o
CFLAGS+=$(CORTEXM_ARM_EXTRA_CFLAGS) -DWOLFSSL_ARM_ARCH=7
OBJS+=$(CORTEXM_ARM_EXTRA_OBJS)
endif
endif
endif
else
ifeq ($(CORTEX_M0),1)
ifeq ($(CORTEX_M0),1)
CFLAGS+=-mcpu=cortex-m0
LDFLAGS+=-mcpu=cortex-m0
ifeq ($(SPMATH),1)
Expand All @@ -255,6 +281,9 @@ else
else
CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_THUMB_ASM
MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_armthumb.o
# TODO: integrate thumb2-asm
#CFLAGS+=$(CORTEXM_ARM_EXTRA_CFLAGS) -DWOLFSSL_ARM_ARCH=6
#OBJS+=$(CORTEXM_ARM_THUMB_EXTRA_OBJS)
endif
endif
else
Expand All @@ -269,6 +298,8 @@ else
ifeq ($(SPMATH),1)
CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM -DWOLFSSL_SP_NO_UMAAL
MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o
CFLAGS+=$(CORTEXM_ARM_EXTRA_CFLAGS) -DWOLFSSL_ARM_ARCH=7
OBJS+=$(CORTEXM_ARM_EXTRA_OBJS)
endif
endif
else
Expand All @@ -284,6 +315,8 @@ else
ifeq ($(SPMATH),1)
CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM
MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o
CFLAGS+=$(CORTEXM_ARM_EXTRA_CFLAGS) -DWOLFSSL_ARM_ARCH=7
OBJS+=$(CORTEXM_ARM_EXTRA_OBJS)
endif
endif
endif
Expand Down
22 changes: 22 additions & 0 deletions docs/compile.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,28 @@ By default, wolfBoot is compiled for ARM Cortex-M3/4/7. To compile for Cortex-M0

`CORTEX_M0=1`

### Speed vs. size

Some targets support assembly optimizations by default.
To disable assembly optimizations, use `NO_ASM=1`. This option will
produce smaller code, but will also impact on the boot time.

ARM-specific ARM optimizations affecting hash and symmetric key ciphers can be
disabled with the option `NO_ARM_ASM=1`. This is useful for example when you want
to use SP math optimizations for key verification, but exclude SHA2/AES optimizations
to save some space.

#### Example: ECC256 + SHA256 on STM32H7

Benchmark footprint vs. boot time SHA of 100KB image + signature verification

| Description | Selected options | wolfBoot size (B) | Boot time (s) |
|-------------|------------------|-------------------|---------------|
| Full ECC256 assembly optimizations. Fastest. | `SIGN=ECC256` | 21836 | .583 |
| Optimize ECC only (SP math assembly only) | `SIGN=ECC256 NO_ARM_ASM=1` | 18624 | .760 |
| No assembly optimizations (smallest) | `SIGN=ECC256 NO_ASM=1` | 14416 | 3.356 |


### Flash partitions

The file [include/target.h](../include/target.h) is generated according to the configured flash geometry,
Expand Down
9 changes: 8 additions & 1 deletion include/user_settings.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ extern int tolower(int c);
# define ED25519_SMALL
# define NO_ED25519_SIGN
# define NO_ED25519_EXPORT
# define WOLFSSL_SHA512
# define USE_SLOW_SHA512
# define WOLFSSL_SHA512
#endif

/* ED448 and SHA3/SHAKE256 */
Expand Down Expand Up @@ -267,6 +267,9 @@ extern int tolower(int c);
!defined(WOLFCRYPT_SECURE_MODE)
# define NO_SHA256
# endif
#ifndef WOLFSSL_SHA512
#define WOLFSSL_SHA512
#endif
#endif

/* If SP math is enabled determine word size */
Expand Down Expand Up @@ -499,4 +502,8 @@ extern int tolower(int c);

#endif /* WOLFBOOT_PKCS11_APP */

#ifndef XTOLOWER
#define XTOLOWER(x) (x)
#endif

#endif /* !_WOLFBOOT_USER_SETTINGS_H_ */
8 changes: 4 additions & 4 deletions options.mk
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,7 @@ ifeq ($(SIGN),XMSS)
ifeq ($(WOLFBOOT_SMALL_STACK),1)
$(error WOLFBOOT_SMALL_STACK with XMSS not supported)
else
STACK_USAGE=2720
STACK_USAGE=9352
endif
endif

Expand Down Expand Up @@ -684,9 +684,9 @@ ifeq ($(WOLFCRYPT_TZ_PKCS11),1)
WOLFCRYPT_OBJS+=./lib/wolfssl/wolfcrypt/src/hmac.o
WOLFCRYPT_OBJS+=./lib/wolfssl/wolfcrypt/src/dh.o
WOLFCRYPT_OBJS+=./lib/wolfPKCS11/src/crypto.o \
./lib/wolfPKCS11/src/internal.o \
./lib/wolfPKCS11/src/slot.o \
./lib/wolfPKCS11/src/wolfpkcs11.o
./lib/wolfPKCS11/src/internal.o \
./lib/wolfPKCS11/src/slot.o \
./lib/wolfPKCS11/src/wolfpkcs11.o
STACK_USAGE=16688
ifneq ($(ENCRYPT),1)
WOLFCRYPT_OBJS+=./lib/wolfssl/wolfcrypt/src/aes.o
Expand Down
4 changes: 4 additions & 0 deletions test-app/app_stm32h7.c
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,9 @@ void uart_print(const char *s)
}
}

#define FILLER_SIZE (100 * 1024)
static volatile uint8_t filler_data[FILLER_SIZE] = { 0x01, 0x02, 0x03 };

void main(void)
{
uint8_t firmware_version = 0;
Expand All @@ -373,6 +376,7 @@ void main(void)
if (FIRMWARE_A)
ld3_write(LED_INIT);

filler_data[FILLER_SIZE - 1] = 0xAA;
/* LED Indicator of successful UART initialization. SUCCESS = ON, FAIL = OFF */
if (uart_setup(115200) < 0)
ld2_write(LED_OFF);
Expand Down
4 changes: 3 additions & 1 deletion tools/config.mk
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ ifeq ($(ARCH),)
CORTEX_M7?=0
CORTEX_M3?=0
NO_ASM?=0
NO_ARM_ASM?=0
EXT_FLASH?=0
SPI_FLASH?=0
QSPI_FLASH?=0
Expand Down Expand Up @@ -104,5 +105,6 @@ CONFIG_VARS:= ARCH TARGET SIGN HASH MCUXSDK MCUXPRESSO MCUXPRESSO_CPU MCUXPRESSO
NXP_CUSTOM_DCD NXP_CUSTOM_DCD_OBJS \
FLASH_OTP_KEYSTORE \
KEYVAULT_OBJ_SIZE \
KEYVAULT_MAX_ITEMS
KEYVAULT_MAX_ITEMS \
NO_ARM_ASM

98 changes: 98 additions & 0 deletions tools/scripts/benchmark.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#!/bin/bash
#
function run_on_board() {
# GPIO2: RST
# GPIO3: BOOT (input)

if ! (st-flash reset &>/dev/null); then
echo -n "No data."
else
sleep 1
st-flash --connect-under-reset write factory.bin 0x8000000 &>/dev/null
sleep .2
echo "2" > /sys/class/gpio/export 2>/dev/null
echo "out" > /sys/class/gpio/gpio2/direction
echo "1" > /sys/class/gpio/gpio2/value # Release reset
echo "0" > /sys/class/gpio/gpio2/value # Keep reset low
sleep 1
echo -n " | "
echo "1" > /sys/class/gpio/gpio2/value # Release reset
START=`date +%s.%N`
while (test `cat /sys/class/gpio/gpio4/value` -eq 0); do
sleep .01
done
while (test `cat /sys/class/gpio/gpio4/value` -eq 0); do
sleep .01
done
END=`date +%s.%N`
echo "scale=3; $END/1 - $START/1 "| bc
echo "in" > /sys/class/gpio/gpio2/direction
echo "2" >/sys/class/gpio/unexport 2>/dev/null
fi
}

function set_benchmark {
NAME=$1
shift
CONFIG=$@
# Name
echo -n "| "
echo -n $NAME
echo -n " | "
# Configuration
echo -n $CONFIG | tr -d '\n'
echo -n " | "
make clean &>/dev/null
make keysclean &>/dev/null
make $@ factory.bin &>/dev/null || make $@ factory.bin
make $@ stack-usage &>/dev/null
make $@ image-header-size &>/dev/null
# Bootloader size
echo -n `ls -l wolfboot.bin | cut -d " " -f 5 | tr -d '\n'`
echo -n " | "
# Stack size
cat .stack_usage | tr -d '\n'
echo -n " | "
# Image header size
cat .image_header_size | tr -d '\n'
# Boot time
run_on_board 2>&1 | tr -d '\n'
echo " |"
}

echo "4" > /sys/class/gpio/export 2>/dev/null
echo "2" > /sys/class/gpio/unexport 2>/dev/null
make keytools &>/dev/null
cp config/examples/stm32h7.config .config
echo "in" > /sys/class/gpio/gpio4/direction
# Output benchmark results in a Markdown table
echo "| Name | Configuration | Bootloader size | Stack size | Image header size | Boot time |"
echo "|------|---------------|-----------------|------------|-------------------|-----------|"


set_benchmark "SHA2 only" SIGN=NONE
set_benchmark "SHA384 only" SIGN=NONE HASH=SHA384
set_benchmark "SHA3 only" SIGN=NONE HASH=SHA3
set_benchmark "SHA2 only,small" SIGN=NONE NO_ASM=1
set_benchmark "rsa2048" SIGN=RSA2048
set_benchmark "rsa3072" SIGN=RSA3072
set_benchmark "rsa4096" SIGN=RSA4096
set_benchmark "rsa4096 with sha384" SIGN=RSA4096 HASH=SHA384
set_benchmark "ecdsa256" SIGN=ECC256
set_benchmark "ecdsa384" SIGN=ECC384
set_benchmark "ecdsa521" SIGN=ECC521
set_benchmark "ecdsa256 with small stack" SIGN=ECC384 WOLFBOOT_SMALL_STACK=1
set_benchmark "ecdsa256 with fast math" SIGN=ECC384 SP_MATH=0
set_benchmark "ecdsa256, no asm" SIGN=ECC256 NO_ASM=1
set_benchmark "ecdsa384, no asm" SIGN=ECC384 NO_ASM=1
set_benchmark "ecdsa521, no asm" SIGN=ECC521 NO_ASM=1
set_benchmark "ecdsa384 with sha384" SIGN=ECC384 HASH=SHA384
set_benchmark "ed25519 with sha384, small" SIGN=ED25519 HASH=SHA384 NO_ASM=1
set_benchmark "ed25519 fast" SIGN=ED25519 NO_ASM=0
set_benchmark "ed448" SIGN=ED448
set_benchmark "ML_DSA-44" SIGN=ML_DSA ML_DSA_LEVEL=2 IMAGE_SIGNATURE_SIZE=2420 IMAGE_HEADER_SIZE=8192
set_benchmark "ML_DSA-65" SIGN=ML_DSA ML_DSA_LEVEL=3 IMAGE_SIGNATURE_SIZE=3309 IMAGE_HEADER_SIZE=8192
set_benchmark "ML_DSA-87" SIGN=ML_DSA ML_DSA_LEVEL=5 IMAGE_SIGNATURE_SIZE=4627 IMAGE_HEADER_SIZE=12288
set_benchmark "LMS 1-10-8" SIGN=LMS LMS_LEVELS=1 LMS_HEIGHT=10 LMS_WINTERNITZ=8 IMAGE_HEADER_SIZE=4096 IMAGE_SIGNATURE_SIZE=1456
set_benchmark "XMSS-SHA2_10_256'" XMSS_PARAMS='XMSS-SHA2_10_256' SIGN=XMSS IMAGE_SIGNATURE_SIZE=2500 IMAGE_HEADER_SIZE=8192

Loading

0 comments on commit 787a21b

Please sign in to comment.