From bead05e31f43a4e56337fb2b718d34ffe6f7a407 Mon Sep 17 00:00:00 2001 From: Ahmed Ismail Date: Tue, 2 Apr 2024 14:45:57 +0100 Subject: [PATCH 1/4] speech-recognition: Fix issues with software inference engine It seems like the speech recognition DSP task which is collecting the input data from ROM/VSI audio source needs to be a bit slower as in case of using software to do the ML inference the performance is way slower than using the Ethos-NPU which means the DSP task must be delayed for sometime as to allow some extra time for the software inference to be finished. Signed-off-by: Ahmed Ismail --- applications/speech_recognition/dsp/src/scheduler.cpp | 2 +- tools/scripts/build.sh | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/applications/speech_recognition/dsp/src/scheduler.cpp b/applications/speech_recognition/dsp/src/scheduler.cpp index 65d20711..e53394c7 100644 --- a/applications/speech_recognition/dsp/src/scheduler.cpp +++ b/applications/speech_recognition/dsp/src/scheduler.cpp @@ -200,7 +200,7 @@ uint32_t ulScheduler( // Add delay to allow some time for the connectivity task // to send and receive messages to and from the cloud. - vTaskDelay(100); + vTaskDelay(120); } *error=sdfError; return(nbSchedule); diff --git a/tools/scripts/build.sh b/tools/scripts/build.sh index eed22def..83c3ebcf 100755 --- a/tools/scripts/build.sh +++ b/tools/scripts/build.sh @@ -276,11 +276,6 @@ if [ "$EXAMPLE" != "blinky" ] && [ ! -f "$PRIVATE_KEY_PATH" ]; then exit 2 fi -if [ "$EXAMPLE" == "speech-recognition" ] && [ "$ML_INFERENCE_ENGINE" == "SOFTWARE" ]; then - echo "Error: Invalid combination of example and ML Inference engine. speech-recognition only support ETHOS ML Inference" >&2 - exit 1 -fi - if [ "$EXAMPLE" == "object-detection" ] && [ "$TARGET" != "corstone315" ]; then echo "Error: Invalid combination of example and target. object-detection only supports corstone315" >&2 exit 2 From 4092b70213752563671213fa61c0b248beaa066a Mon Sep 17 00:00:00 2001 From: Ahmed Ismail Date: Fri, 15 Mar 2024 17:30:08 +0000 Subject: [PATCH 2/4] ml-model: Separate model image from NS image The idea is to move the ML model to the DDR memory region, generate a separate image for the DDR memory region where the model can be over the air updated without the need to update the NS image. To do so the following changes are needed: * Modify the linker scripts for all platforms to remove the RAM load address of the ML model image as it should be loaded to DDR memory region. * Some linker sections were renamed to be more descriptive. * A new CMake function is introduced to extract sections from input AXF file, this function is used to extract DDR section which contains the ML model out of the NS image to be loaded separately. Signed-off-by: Ahmed Ismail --- applications/keyword_detection/CMakeLists.txt | 2 + applications/object_detection/CMakeLists.txt | 2 + .../speech_recognition/CMakeLists.txt | 2 + bsp/CMakeLists.txt | 3 + bsp/corstone300/an552_ns.ld | 25 +------ bsp/corstone300/an552_ns.sct | 71 ++++++++++++------ bsp/corstone310/an555_ns.ld | 25 +------ bsp/corstone310/an555_ns.sct | 36 +++++---- bsp/corstone315/corstone_315_ns.ld | 23 +----- bsp/corstone315/corstone_315_ns.sct | 34 +++++---- .../integration/cmake/MergeTfmImages.cmake | 7 ++ .../integration/cmake/SignTfmImage.cmake | 15 +++- release_changes/202404021128.change | 1 + tools/cmake/ConvertElfToBin.cmake | 75 ++++++++++++++++++- 14 files changed, 201 insertions(+), 120 deletions(-) create mode 100644 release_changes/202404021128.change diff --git a/applications/keyword_detection/CMakeLists.txt b/applications/keyword_detection/CMakeLists.txt index 264176f7..24a0696c 100644 --- a/applications/keyword_detection/CMakeLists.txt +++ b/applications/keyword_detection/CMakeLists.txt @@ -183,6 +183,8 @@ iot_reference_arm_corstone3xx_tf_m_merge_images( keyword-detection ${NS_PROVISIONING_BUNDLE_LOAD_ADDRESS} ${CMAKE_BINARY_DIR}/helpers/provisioning/provisioning_data.bin + ${NS_DDR4_IMAGE_LOAD_ADDRESS} + ${SECTORS_BIN_DIR}/ddr.bin ) iot_reference_arm_corstone3xx_generate_aws_update_digest_and_signature( diff --git a/applications/object_detection/CMakeLists.txt b/applications/object_detection/CMakeLists.txt index 9e64e9d9..4148fdb9 100644 --- a/applications/object_detection/CMakeLists.txt +++ b/applications/object_detection/CMakeLists.txt @@ -167,6 +167,8 @@ iot_reference_arm_corstone3xx_tf_m_merge_images( object-detection ${NS_PROVISIONING_BUNDLE_LOAD_ADDRESS} ${CMAKE_BINARY_DIR}/helpers/provisioning/provisioning_data.bin + ${NS_DDR4_IMAGE_LOAD_ADDRESS} + ${SECTORS_BIN_DIR}/ddr.bin ) iot_reference_arm_corstone3xx_generate_aws_update_digest_and_signature( diff --git a/applications/speech_recognition/CMakeLists.txt b/applications/speech_recognition/CMakeLists.txt index 680eeb9a..8e2b7be3 100644 --- a/applications/speech_recognition/CMakeLists.txt +++ b/applications/speech_recognition/CMakeLists.txt @@ -194,6 +194,8 @@ iot_reference_arm_corstone3xx_tf_m_merge_images( speech-recognition ${NS_PROVISIONING_BUNDLE_LOAD_ADDRESS} ${CMAKE_BINARY_DIR}/helpers/provisioning/provisioning_data.bin + ${NS_DDR4_IMAGE_LOAD_ADDRESS} + ${SECTORS_BIN_DIR}/ddr.bin ) iot_reference_arm_corstone3xx_generate_aws_update_digest_and_signature( diff --git a/bsp/CMakeLists.txt b/bsp/CMakeLists.txt index fb10a97b..d0873fc6 100644 --- a/bsp/CMakeLists.txt +++ b/bsp/CMakeLists.txt @@ -48,6 +48,7 @@ if(ARM_CORSTONE_BSP_TARGET_PLATFORM STREQUAL "corstone300") set(NS_IMAGE_LOAD_ADDRESS 0x28040000 CACHE STRING "Non-secure user application loading address") set(S_PROVISIONING_BUNDLE_LOAD_ADDRESS 0x10022000 CACHE STRING "Secure provisioning bundle loading address") set(NS_PROVISIONING_BUNDLE_LOAD_ADDRESS 0x211FF000 CACHE STRING "Non-Secure provisioning bundle loading address") + set(NS_DDR4_IMAGE_LOAD_ADDRESS 0x60000000 CACHE STRING "Non-Secure Double Data Rate RAM image loading address") set(ETHOS_U_BASE_ADDR "0x48102000" CACHE STRING "Ethos-U NPU base address" FORCE) set(ETHOS_U_IRQN "56" CACHE STRING "Ethos-U NPU Interrupt" FORCE) @@ -75,6 +76,7 @@ elseif(ARM_CORSTONE_BSP_TARGET_PLATFORM STREQUAL "corstone310") set(NS_IMAGE_LOAD_ADDRESS 0x28040000 CACHE STRING "Non-secure user application loading address") set(S_PROVISIONING_BUNDLE_LOAD_ADDRESS 0x11022000 CACHE STRING "Secure provisioning bundle loading address") set(NS_PROVISIONING_BUNDLE_LOAD_ADDRESS 0x213FF000 CACHE STRING "Non-Secure provisioning bundle loading address") + set(NS_DDR4_IMAGE_LOAD_ADDRESS 0x60000000 CACHE STRING "Non-Secure Double Data Rate RAM image loading address") set(ETHOS_U_BASE_ADDR "0x40004000" CACHE STRING "Ethos-U NPU base address" FORCE) set(ETHOS_U_IRQN "16" CACHE STRING "Ethos-U NPU Interrupt" FORCE) @@ -104,6 +106,7 @@ elseif(ARM_CORSTONE_BSP_TARGET_PLATFORM STREQUAL "corstone315") set(S_CM_PROVISIONING_BUNDLE_LOAD_ADDRESS 0x12024000 CACHE STRING "Secure CM provisioning bundle loading address") set(S_DM_PROVISIONING_BUNDLE_LOAD_ADDRESS 0x1202aa00 CACHE STRING "Secure DM provisioning bundle loading address") set(NS_PROVISIONING_BUNDLE_LOAD_ADDRESS 0x213FF000 CACHE STRING "Non-Secure provisioning bundle loading address") + set(NS_DDR4_IMAGE_LOAD_ADDRESS 0x60000000 CACHE STRING "Non-Secure Double Data Rate RAM image loading address") set(ETHOS_U_BASE_ADDR "0x40004000" CACHE STRING "Ethos-U NPU base address" FORCE) set(ETHOS_U_IRQN "16" CACHE STRING "Ethos-U NPU Interrupt" FORCE) diff --git a/bsp/corstone300/an552_ns.ld b/bsp/corstone300/an552_ns.ld index 528e773b..0b54dbd7 100644 --- a/bsp/corstone300/an552_ns.ld +++ b/bsp/corstone300/an552_ns.ld @@ -1,4 +1,4 @@ -/* Copyright 2009-2023 Arm Limited and/or its affiliates +/* Copyright 2009-2024 Arm Limited and/or its affiliates * * SPDX-License-Identifier: Apache-2.0 */ @@ -104,9 +104,6 @@ SECTIONS LONG (LOADADDR(.data)) LONG (ADDR(.data)) LONG (SIZEOF(.data) / 4) - LONG (LOADADDR(.model)) - LONG (ADDR(.model)) - LONG (SIZEOF(.model) / 4) __copy_table_end__ = .; /* .zero.table */ @@ -192,7 +189,7 @@ SECTIONS } > RAM - .model : AT ( LOADADDR(.data) + SIZEOF(.data) ) + ddr.bin : { __ddr_start__ = .; . = ALIGN(16); @@ -204,30 +201,16 @@ SECTIONS . = ALIGN (16); /* labels */ *(labels) - __ddr_end__ = .; - } > DDR - - __ddr_load_address__ = LOADADDR(.model); - - .activationbuf : - { . = ALIGN (16); *(.bss.NoInit.activation_buf_sram) *(.bss.NoInit.activation_buf_dram) - } > DDR AT > DDR - - .vsibuffer : - { . = ALIGN (4); /* Buffer for transferring VSI audio data from S to NS */ *(.bss.NoInit.vsi_audio_buffer) - } > DDR AT > DDR - - .ethosucache : - { . = ALIGN (16); *(.bss.NoInit.ethos_u_cache) - } > DDR AT > DDR + __ddr_end__ = .; + } > DDR .bss : { diff --git a/bsp/corstone300/an552_ns.sct b/bsp/corstone300/an552_ns.sct index 952cd704..b82fcbd7 100644 --- a/bsp/corstone300/an552_ns.sct +++ b/bsp/corstone300/an552_ns.sct @@ -1,32 +1,64 @@ -/* Copyright 2023-2024, Arm Limited and/or its affiliates - * - * SPDX-License-Identifier: Apache-2.0 +#! armclang --target=arm-arm-none-eabi -march=armv8.1-m.main+mve.fp+fp.dp -E -x c +/* + * Copyright (c) 2023-2024, Arm Limited. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -LR_CODE (0x28000000 + ((((0) + (0x40000)) + (0x400)))) { - ER_CODE (0x28000000 + ((((0) + (0x40000)) + (0x400)))) (((0x340000) - (0x400) - (0xC00))) { +#define FLASH_NS_PARTITION_SIZE 0x340000 +#define FLASH_S_PARTITION_SIZE 0x40000 +#define ROM_START 0x28000000 +#define BL2_HEADER_SIZE 0x400 +#define BL2_TRAILER_SIZE 0xC00 +#define ISRAM0_BASE_ADDRESS 0x21000000 +#define ISRAM0_S_SIZE 0x00020000 +#define ISRAM0_SIZE 0x00100000 +#define ISRAM1_SIZE 0x00100000 +#define PROVISIONING_SIZE 0x1000 +#define STACK_SIZE 0x00001000 +#define HEAP_SIZE 0x000C0000 + +LOAD_REGION_0 (ROM_START + ((((0) + (FLASH_S_PARTITION_SIZE)) + (BL2_HEADER_SIZE)))) +{ + flash.bin (ROM_START + ((((0) + (FLASH_S_PARTITION_SIZE)) + (BL2_HEADER_SIZE)))) (((FLASH_NS_PARTITION_SIZE) - (BL2_HEADER_SIZE) - (BL2_TRAILER_SIZE))) { *.o (RESET +First) * (InRoot$$Sections) * (+RO) } - /* Where 0x20000 is NS_RAM_SIZE and 0x1000 is the provisioning size */ - ER_DATA (0x21000000 + 0x20000) (0x00200000 - 0x20000 - 0x1000) { + ; The last 0x1000 bytes are reserved for the provisioning bundle + data.bin (ISRAM0_BASE_ADDRESS + ISRAM0_S_SIZE) ((ISRAM0_SIZE -ISRAM0_S_SIZE) + ISRAM1_SIZE - PROVISIONING_SIZE) { * (+ZI +RW) } - ARM_LIB_STACK +0 ALIGN 32 EMPTY (0x0001000) { + ARM_LIB_STACK +0 ALIGN 32 EMPTY (STACK_SIZE) { } - ARM_LIB_HEAP +0 ALIGN 8 EMPTY (0x00C0000) { + ARM_LIB_HEAP +0 ALIGN 8 EMPTY (HEAP_SIZE) { } +} +;--------------------------------------------------------- +; Second load region (DDR) +;--------------------------------------------------------- +LOAD_REGION_1 0x60000000 0x10000000 +{ ;----------------------------------------------------- - ; SSE-300's internal SRAM of 4MiB - reserved for - ; activation buffers. - ; This region should have 3 cycle read latency from - ; both Cortex-M55 and Ethos-U55 + ; 32 MiB of DDR space for neural network model, + ; input vectors and labels. If the activation buffer + ; size required by the network is bigger than the + ; SRAM size available, it is accommodated here. ;----------------------------------------------------- - ddr.bin 0x60000000 NOCOMPRESS ALIGN 16 0x10000000 + ddr.bin 0x60000000 NOCOMPRESS ALIGN 16 0x10000000 { ; nn model's baked in input matrices *.o (ifm) @@ -37,19 +69,16 @@ LR_CODE (0x28000000 + ((((0) + (0x40000)) + (0x400)))) { ; labels *.o (labels) - ; if the activation buffer (tensor arena) doesn't - ; fit in the SRAM region, we accommodate it here - *.o (activation_buf) - - ; activation buffers a.k.a tensor arena when memory mode dedicated sram - *.o (.bss.NoInit.activation_buf_dram) - ; activation buffers a.k.a tensor arena when ; memory mode sram only or shared sram *.o (.bss.NoInit.activation_buf_sram) + ; activation buffers a.k.a tensor arena when memory mode dedicated sram + *.o (.bss.NoInit.activation_buf_dram) + ; Buffer for transferring VSI audio data from S to NS *.o (.bss.NoInit.vsi_audio_buffer) + ; Cache area (if used) *.o (.bss.NoInit.ethos_u_cache) } diff --git a/bsp/corstone310/an555_ns.ld b/bsp/corstone310/an555_ns.ld index 25d6608f..37114312 100644 --- a/bsp/corstone310/an555_ns.ld +++ b/bsp/corstone310/an555_ns.ld @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2022 Arm Limited. All rights reserved. + * Copyright (c) 2009-2024 Arm Limited. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -118,9 +118,6 @@ SECTIONS LONG (LOADADDR(.data)) LONG (ADDR(.data)) LONG (SIZEOF(.data) / 4) - LONG (LOADADDR(.model)) - LONG (ADDR(.model)) - LONG (SIZEOF(.model) / 4) __copy_table_end__ = .; /* .zero.table */ @@ -206,7 +203,7 @@ SECTIONS } > RAM - .model : AT ( LOADADDR(.data) + SIZEOF(.data) ) + ddr.bin : { __ddr_start__ = .; . = ALIGN(16); @@ -218,30 +215,16 @@ SECTIONS . = ALIGN (16); /* labels */ *(labels) - __ddr_end__ = .; - } > DDR - - __ddr_load_address__ = LOADADDR(.model); - - .activationbuf : - { . = ALIGN (16); *(.bss.NoInit.activation_buf_sram) *(.bss.NoInit.activation_buf_dram) - } > DDR AT > DDR - - .vsibuffer : - { . = ALIGN (4); /* Buffer for transferring VSI audio data from S to NS */ *(.bss.NoInit.vsi_audio_buffer) - } > DDR AT > DDR - - .ethosucache : - { . = ALIGN (16); *(.bss.NoInit.ethos_u_cache) - } > DDR AT > DDR + __ddr_end__ = .; + } > DDR .bss : { diff --git a/bsp/corstone310/an555_ns.sct b/bsp/corstone310/an555_ns.sct index 40b810bf..536f90b2 100644 --- a/bsp/corstone310/an555_ns.sct +++ b/bsp/corstone310/an555_ns.sct @@ -1,6 +1,6 @@ #! armclang --target=arm-arm-none-eabi -march=armv8.1-m.main+mve.fp+fp.dp -E -x c /* - * Copyright (c) 2021-2023 Arm Limited. All rights reserved. + * Copyright (c) 2021-2024 Arm Limited. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,14 +27,15 @@ #define STACK_SIZE 0x00001000 #define HEAP_SIZE 0x000C0000 -LR_CODE (ROM_START + ((((0) + (FLASH_S_PARTITION_SIZE)) + BL2_HEADER_SIZE))) { - ER_CODE (ROM_START + ((((0) + (FLASH_S_PARTITION_SIZE)) + BL2_HEADER_SIZE))) (((FLASH_NS_PARTITION_SIZE) - BL2_HEADER_SIZE - BL2_TRAILER_SIZE)) { +LOAD_REGION_0 (ROM_START + ((((0) + (FLASH_S_PARTITION_SIZE)) + BL2_HEADER_SIZE))) +{ + flash.bin (ROM_START + ((((0) + (FLASH_S_PARTITION_SIZE)) + BL2_HEADER_SIZE))) (((FLASH_NS_PARTITION_SIZE) - BL2_HEADER_SIZE - BL2_TRAILER_SIZE)) { *.o (RESET +First) * (+RO) } ; The last 0x1000 bytes are reserved for the provisioning bundle - ER_DATA (ISRAM0_BASE_ADDRESS + ((ISRAM1_OFFSET))) (ISRAM1_SIZE - PROVISIONING_SIZE) { + data.bin (ISRAM0_BASE_ADDRESS + ((ISRAM1_OFFSET))) (ISRAM1_SIZE - PROVISIONING_SIZE) { * (+ZI +RW) } @@ -44,14 +45,20 @@ LR_CODE (ROM_START + ((((0) + (FLASH_S_PARTITION_SIZE)) + BL2_HEADER_SIZE))) { ARM_LIB_HEAP +0 ALIGN 8 EMPTY (HEAP_SIZE) { } +} +;--------------------------------------------------------- +; Second load region (DDR) +;--------------------------------------------------------- +LOAD_REGION_1 0x60000000 0x10000000 +{ ;----------------------------------------------------- - ; SSE-300's internal SRAM of 4MiB - reserved for - ; activation buffers. - ; This region should have 3 cycle read latency from - ; both Cortex-M55 and Ethos-U55 + ; 32 MiB of DDR space for neural network model, + ; input vectors and labels. If the activation buffer + ; size required by the network is bigger than the + ; SRAM size available, it is accommodated here. ;----------------------------------------------------- - ddr.bin 0x60000000 NOCOMPRESS ALIGN 16 0x10000000 + ddr.bin 0x60000000 NOCOMPRESS ALIGN 16 0x10000000 { ; nn model's baked in input matrices *.o (ifm) @@ -62,19 +69,16 @@ LR_CODE (ROM_START + ((((0) + (FLASH_S_PARTITION_SIZE)) + BL2_HEADER_SIZE))) { ; labels *.o (labels) - ; if the activation buffer (tensor arena) doesn't - ; fit in the SRAM region, we accommodate it here - *.o (activation_buf) - - ; activation buffers a.k.a tensor arena when memory mode dedicated sram - *.o (.bss.NoInit.activation_buf_dram) - ; activation buffers a.k.a tensor arena when ; memory mode sram only or shared sram *.o (.bss.NoInit.activation_buf_sram) + ; activation buffers a.k.a tensor arena when memory mode dedicated sram + *.o (.bss.NoInit.activation_buf_dram) + ; Buffer for transferring VSI audio data from S to NS *.o (.bss.NoInit.vsi_audio_buffer) + ; Cache area (if used) *.o (.bss.NoInit.ethos_u_cache) } diff --git a/bsp/corstone315/corstone_315_ns.ld b/bsp/corstone315/corstone_315_ns.ld index 4ac67d84..2eb0c1b1 100644 --- a/bsp/corstone315/corstone_315_ns.ld +++ b/bsp/corstone315/corstone_315_ns.ld @@ -118,9 +118,6 @@ SECTIONS LONG (LOADADDR(.data)) LONG (ADDR(.data)) LONG (SIZEOF(.data) / 4) - LONG (LOADADDR(.model)) - LONG (ADDR(.model)) - LONG (SIZEOF(.model) / 4) __copy_table_end__ = .; /* .zero.table */ @@ -206,7 +203,7 @@ SECTIONS } > RAM - .model : AT ( LOADADDR(.data) + SIZEOF(.data) ) + ddr.bin : { __ddr_start__ = .; . = ALIGN(16); @@ -218,30 +215,16 @@ SECTIONS . = ALIGN (16); /* labels */ *(labels) - __ddr_end__ = .; - } > DDR - - __ddr_load_address__ = LOADADDR(.model); - - .activationbuf : - { . = ALIGN (16); *(.bss.NoInit.activation_buf_sram) *(.bss.NoInit.activation_buf_dram) - } > DDR AT > DDR - - .vsibuffer : - { . = ALIGN (4); /* Buffer for transferring VSI audio data from S to NS */ *(.bss.NoInit.vsi_audio_buffer) - } > DDR AT > DDR - - .ethosucache : - { . = ALIGN (16); *(.bss.NoInit.ethos_u_cache) - } > DDR AT > DDR + __ddr_end__ = .; + } > DDR .bss : { diff --git a/bsp/corstone315/corstone_315_ns.sct b/bsp/corstone315/corstone_315_ns.sct index 0eb079b8..ad99a40c 100644 --- a/bsp/corstone315/corstone_315_ns.sct +++ b/bsp/corstone315/corstone_315_ns.sct @@ -27,14 +27,15 @@ #define STACK_SIZE 0x00001000 #define HEAP_SIZE 0x000C0000 -LR_CODE (ROM_START + ((((0) + (FLASH_S_PARTITION_SIZE)) + BL2_HEADER_SIZE))) { - ER_CODE (ROM_START + ((((0) + (FLASH_S_PARTITION_SIZE)) + BL2_HEADER_SIZE))) (((FLASH_NS_PARTITION_SIZE) - BL2_HEADER_SIZE - BL2_TRAILER_SIZE)) { +LOAD_REGION_0 (ROM_START + ((((0) + (FLASH_S_PARTITION_SIZE)) + BL2_HEADER_SIZE))) +{ + flash.bin (ROM_START + ((((0) + (FLASH_S_PARTITION_SIZE)) + BL2_HEADER_SIZE))) (((FLASH_NS_PARTITION_SIZE) - BL2_HEADER_SIZE - BL2_TRAILER_SIZE)) { *.o (RESET +First) * (+RO) } ; The last 0x1000 bytes are reserved for the provisioning bundle - ER_DATA (ISRAM0_BASE_ADDRESS + ((ISRAM1_OFFSET))) (ISRAM1_SIZE - PROVISIONING_SIZE) { + data.bin (ISRAM0_BASE_ADDRESS + ((ISRAM1_OFFSET))) (ISRAM1_SIZE - PROVISIONING_SIZE) { * (+ZI +RW) } @@ -44,14 +45,20 @@ LR_CODE (ROM_START + ((((0) + (FLASH_S_PARTITION_SIZE)) + BL2_HEADER_SIZE))) { ARM_LIB_HEAP +0 ALIGN 8 EMPTY (HEAP_SIZE) { } +} +;--------------------------------------------------------- +; Second load region (DDR) +;--------------------------------------------------------- +LOAD_REGION_1 0x60000000 0x10000000 +{ ;----------------------------------------------------- - ; SSE-300's internal SRAM of 4MiB - reserved for - ; activation buffers. - ; This region should have 3 cycle read latency from - ; both Cortex-M55 and Ethos-U55 + ; 32 MiB of DDR space for neural network model, + ; input vectors and labels. If the activation buffer + ; size required by the network is bigger than the + ; SRAM size available, it is accommodated here. ;----------------------------------------------------- - ddr.bin 0x60000000 NOCOMPRESS ALIGN 16 0x10000000 + ddr.bin 0x60000000 NOCOMPRESS ALIGN 16 0x10000000 { ; nn model's baked in input matrices *.o (ifm) @@ -62,19 +69,16 @@ LR_CODE (ROM_START + ((((0) + (FLASH_S_PARTITION_SIZE)) + BL2_HEADER_SIZE))) { ; labels *.o (labels) - ; if the activation buffer (tensor arena) doesn't - ; fit in the SRAM region, we accommodate it here - *.o (activation_buf) - - ; activation buffers a.k.a tensor arena when memory mode dedicated sram - *.o (.bss.NoInit.activation_buf_dram) - ; activation buffers a.k.a tensor arena when ; memory mode sram only or shared sram *.o (.bss.NoInit.activation_buf_sram) + ; activation buffers a.k.a tensor arena when memory mode dedicated sram + *.o (.bss.NoInit.activation_buf_dram) + ; Buffer for transferring VSI audio data from S to NS *.o (.bss.NoInit.vsi_audio_buffer) + ; Cache area (if used) *.o (.bss.NoInit.ethos_u_cache) } diff --git a/components/security/trusted_firmware-m/integration/cmake/MergeTfmImages.cmake b/components/security/trusted_firmware-m/integration/cmake/MergeTfmImages.cmake index 15817e93..4c00a176 100644 --- a/components/security/trusted_firmware-m/integration/cmake/MergeTfmImages.cmake +++ b/components/security/trusted_firmware-m/integration/cmake/MergeTfmImages.cmake @@ -24,6 +24,11 @@ function(iot_reference_arm_corstone3xx_tf_m_merge_images target) else() set(ns_provisioning_data_param "") endif() + if(DEFINED ARGV3 AND DEFINED ARGV4) + set(ddr_binary_param ${ARGV4} -Binary -offset ${ARGV3}) + else() + set(ddr_binary_param "") + endif() find_program(srec_cat NAMES srec_cat REQUIRED) find_program(objcopy NAMES arm-none-eabi-objcopy objcopy REQUIRED) if(ARM_CORSTONE_BSP_TARGET_PLATFORM STREQUAL "corstone300" OR ARM_CORSTONE_BSP_TARGET_PLATFORM STREQUAL "corstone310") @@ -37,6 +42,7 @@ function(iot_reference_arm_corstone3xx_tf_m_merge_images target) ${srec_cat} ${BINARY_DIR}/api_ns/bin/bl2.bin -Binary -offset ${BL2_IMAGE_LOAD_ADDRESS} ${BINARY_DIR}/api_ns/bin/tfm_s_signed.bin -Binary -offset ${S_IMAGE_LOAD_ADDRESS} $/${target}_signed.bin -Binary -offset ${NS_IMAGE_LOAD_ADDRESS} + ${ddr_binary_param} ${ns_provisioning_data_param} ${BINARY_DIR}/api_ns/bin/encrypted_provisioning_bundle.bin -Binary -offset ${S_PROVISIONING_BUNDLE_LOAD_ADDRESS} -o $/${target}_merged.hex @@ -62,6 +68,7 @@ function(iot_reference_arm_corstone3xx_tf_m_merge_images target) ${BINARY_DIR}/api_ns/bin/bl2_signed.bin -Binary -offset ${BL2_IMAGE_LOAD_ADDRESS} ${BINARY_DIR}/api_ns/bin/tfm_s_signed.bin -Binary -offset ${S_IMAGE_LOAD_ADDRESS} $/${target}_signed.bin -Binary -offset ${NS_IMAGE_LOAD_ADDRESS} + ${ddr_binary_param} ${ns_provisioning_data_param} -o $/${target}_merged.hex COMMAND diff --git a/components/security/trusted_firmware-m/integration/cmake/SignTfmImage.cmake b/components/security/trusted_firmware-m/integration/cmake/SignTfmImage.cmake index 6e099e35..d1df1e48 100644 --- a/components/security/trusted_firmware-m/integration/cmake/SignTfmImage.cmake +++ b/components/security/trusted_firmware-m/integration/cmake/SignTfmImage.cmake @@ -1,4 +1,4 @@ -# Copyright 2023 Arm Limited and/or its affiliates +# Copyright 2023-2024 Arm Limited and/or its affiliates # # SPDX-License-Identifier: MIT @@ -14,7 +14,16 @@ function(iot_reference_arm_corstone3xx_tf_m_sign_image target signed_target_name else() set(pad_option "") endif() - target_elf_to_bin(${target} ${target}_unsigned) + + set(LINKER_SECTION_NAMES "ddr.bin") + set(OUTPUT_BINARY_NAME "flash") + + extract_sections_from_axf( + ${target} + SECTIONS_NAMES "${LINKER_SECTION_NAMES}" + OUTPUT_BIN_NAME "${OUTPUT_BINARY_NAME}" + ) + add_custom_command( TARGET ${target} @@ -31,7 +40,7 @@ function(iot_reference_arm_corstone3xx_tf_m_sign_image target signed_target_name --align 1 --pad-header ${pad_option} -H 0x400 -s auto --measured-boot-record --confirm - $/${target}_unsigned.bin + ${SECTORS_BIN_DIR}/${OUTPUT_BINARY_NAME}.bin $/${signed_target_name}.bin COMMAND ${CMAKE_COMMAND} -E echo "-- signed: $/${signed_target_name}.bin" diff --git a/release_changes/202404021128.change b/release_changes/202404021128.change new file mode 100644 index 00000000..15fae51f --- /dev/null +++ b/release_changes/202404021128.change @@ -0,0 +1 @@ +ml-model: Separate model image from NS image. diff --git a/tools/cmake/ConvertElfToBin.cmake b/tools/cmake/ConvertElfToBin.cmake index 686cdfe9..a5a44812 100644 --- a/tools/cmake/ConvertElfToBin.cmake +++ b/tools/cmake/ConvertElfToBin.cmake @@ -1,12 +1,23 @@ -# Copyright 2021-2023 Arm Limited and/or its affiliates +# Copyright 2021-2024 Arm Limited and/or its affiliates # # SPDX-License-Identifier: MIT function(target_elf_to_bin target output_binary_name) if(CMAKE_C_COMPILER_ID STREQUAL "GNU") - set(elf_to_bin arm-none-eabi-objcopy -O binary $ $/${output_binary_name}.bin) + find_program(objcopy NAMES arm-none-eabi-objcopy objcopy REQUIRED) + set(elf_to_bin ${objcopy} + -O binary + $ + $/${output_binary_name}.bin + ) elseif(CMAKE_C_COMPILER_ID STREQUAL "ARMClang") - set(elf_to_bin fromelf --bin --output $/${output_binary_name}.bin $ --bincombined) + find_program(fromelf NAMES fromelf REQUIRED) + set(elf_to_bin ${fromelf} + --bin + --output $/${output_binary_name}.bin + $ + --bincombined + ) endif() add_custom_command( @@ -22,3 +33,61 @@ function(target_elf_to_bin target output_binary_name) VERBATIM ) endfunction() + +# This function is used to extract sections in binaries from input AXF file. +# It is making use of CMake optional arguments feature, the reason why this feature +# is used is that in case of using Arm GNU toolchain, it is up for each application to decide +# which sections are to be extracted using `SECTIONS_NAMES` variable, and the output binary name +# to be generated after sections are eliminated. However, in case of using Arm Compiler For Embedded (ArmClang), +# all the image's code sections are extracted automatically in binary files using `fromelf` tool. +function(extract_sections_from_axf target) + set(multiValueArgs SECTIONS_NAMES) + set(oneValueArgs OUTPUT_BIN_NAME) + cmake_parse_arguments(PARSED "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + set(SECTORS_BIN_DIR ${CMAKE_BINARY_DIR}/application_sectors CACHE INTERNAL "Output sectors binaries directory") + file(MAKE_DIRECTORY ${SECTORS_BIN_DIR}) + + if(CMAKE_C_COMPILER_ID STREQUAL "GNU") + find_program(objcopy NAMES arm-none-eabi-objcopy objcopy REQUIRED) + list(LENGTH PARSED_SECTIONS_NAMES N_SECTIONS) + math(EXPR MAX_IDX "${N_SECTIONS} - 1") + + foreach(IDX RANGE ${MAX_IDX}) + list(GET PARSED_SECTIONS_NAMES ${IDX} SECTION_NAME) + + add_custom_command( + TARGET + ${target} + POST_BUILD + DEPENDS + $ + COMMAND + ${objcopy} -O binary + --only-section ${SECTION_NAME} + $ + ${SECTORS_BIN_DIR}/${SECTION_NAME} + COMMAND + ${objcopy} -O binary + --remove-section ${SECTION_NAME} + $ + ${SECTORS_BIN_DIR}/${PARSED_OUTPUT_BIN_NAME}.bin + ) + endforeach() + + elseif(CMAKE_C_COMPILER_ID STREQUAL "ARMClang") + find_program(fromelf NAMES fromelf REQUIRED) + add_custom_command( + TARGET + ${target} + DEPENDS + $ + POST_BUILD + COMMAND + ${fromelf} --bin + --output=${SECTORS_BIN_DIR}/ + $ + ) + endif() + +endfunction() From 6ea5a2c41385644ad92c9ce0da0ee052525850b0 Mon Sep 17 00:00:00 2001 From: Ahmed Ismail Date: Tue, 9 Apr 2024 15:42:20 +0100 Subject: [PATCH 3/4] ci: Add support for speech-recognition with software inference Signed-off-by: Ahmed Ismail --- .gitlab-ci.yml | 30 ++++++------------------------ 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ad9e48e2..0f0e55f4 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -329,7 +329,7 @@ sw-vsi-configs-test: TOOLCHAIN: [ARMCLANG] - << : *pipeline_config_corstone315 - APP: [keyword-detection] + APP: [keyword-detection, speech-recognition] INFERENCE: [SOFTWARE] AUDIO: [ROM, VSI] TOOLCHAIN: [ARMCLANG] @@ -347,7 +347,7 @@ sw-vsi-configs-test: TOOLCHAIN: [ARMCLANG] - << : *pipeline_config_corstone310 - APP: [keyword-detection] + APP: [keyword-detection, speech-recognition] INFERENCE: [SOFTWARE] AUDIO: [ROM, VSI] TOOLCHAIN: [ARMCLANG] @@ -359,7 +359,7 @@ sw-vsi-configs-test: TOOLCHAIN: [ARMCLANG] - << : *pipeline_config_corstone300 - APP: [keyword-detection] + APP: [keyword-detection, speech-recognition] INFERENCE: [SOFTWARE] AUDIO: [ROM, VSI] TOOLCHAIN: [ARMCLANG] @@ -382,16 +382,10 @@ gnu-toolchain-test: matrix: - << : *pipeline_config_corstone315 - APP: [keyword-detection] + APP: [keyword-detection, speech-recognition] INFERENCE: [ETHOS, SOFTWARE] AUDIO: [ROM,VSI] TOOLCHAIN: [GNU] - - - << : *pipeline_config_corstone315 - APP: [speech-recognition] - INFERENCE: [ETHOS] - AUDIO: [ROM,VSI] - TOOLCHAIN: [GNU] - << : *pipeline_config_corstone315 APP: [object-detection] @@ -400,28 +394,16 @@ gnu-toolchain-test: TOOLCHAIN: [GNU] - << : *pipeline_config_corstone310 - APP: [keyword-detection] + APP: [keyword-detection, speech-recognition] INFERENCE: [ETHOS, SOFTWARE] AUDIO: [ROM,VSI] TOOLCHAIN: [GNU] - - - << : *pipeline_config_corstone310 - APP: [speech-recognition] - INFERENCE: [ETHOS] - AUDIO: [ROM,VSI] - TOOLCHAIN: [GNU] - << : *pipeline_config_corstone300 - APP: [keyword-detection] + APP: [keyword-detection, speech-recognition] INFERENCE: [ETHOS, SOFTWARE] AUDIO: [ROM,VSI] TOOLCHAIN: [GNU] - - - << : *pipeline_config_corstone300 - APP: [speech-recognition] - INFERENCE: [ETHOS] - AUDIO: [ROM,VSI] - TOOLCHAIN: [GNU] variables: GIT_SUBMODULE_STRATEGY: recursive From 639eec75c401d68742083151c7abdb00bb4cfecf Mon Sep 17 00:00:00 2001 From: Devaraj Ranganna Date: Mon, 15 Apr 2024 09:44:37 +0000 Subject: [PATCH 4/4] ci: Enable software inferecing in speech-recogntion application Signed-off-by: Devaraj Ranganna --- .github/workflows/build.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 9902313a..2e2cf3ff 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -26,8 +26,6 @@ jobs: audio: [ROM, VSI] inference: [ETHOS, SOFTWARE] exclude: - - application: speech-recognition - inference: SOFTWARE - application: object-detection audio: VSI steps: