From 72dce41976c57bd148466ceae9b7eb6d41651995 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Fri, 6 Sep 2024 12:38:12 +0800 Subject: [PATCH 01/25] upgrade xnn --- cgmanifests/generated/cgmanifest.json | 2 +- cmake/deps.txt | 3 +- .../xnnpack/AddEmscriptenAndIosSupport.patch | 34 +++++++++---------- .../core/providers/xnnpack/math/softmax.cc | 19 +++++------ .../core/providers/xnnpack/math/softmax.h | 1 + .../core/providers/xnnpack/nn/average_pool.cc | 8 ++--- .../core/providers/xnnpack/nn/average_pool.h | 1 + .../core/providers/xnnpack/nn/max_pool.cc | 5 ++- .../core/providers/xnnpack/tensor/resize.cc | 9 ++--- .../core/providers/xnnpack/xnnpack_kernel.h | 4 +-- 10 files changed, 41 insertions(+), 45 deletions(-) diff --git a/cgmanifests/generated/cgmanifest.json b/cgmanifests/generated/cgmanifest.json index f7c0159c1f0ab..4e3a85ac6b849 100644 --- a/cgmanifests/generated/cgmanifest.json +++ b/cgmanifests/generated/cgmanifest.json @@ -146,7 +146,7 @@ "component": { "type": "git", "git": { - "commitHash": "0da379fc4808f9601faef392352018c741c0f297", + "commitHash": "39FA5259EAEACE0547284B63D5CEDC4F05553F5A", "repositoryUrl": "https://github.com/google/XNNPACK.git" }, "comments": "googlexnnpack" diff --git a/cmake/deps.txt b/cmake/deps.txt index 2487ea144227d..cae28017cb47b 100644 --- a/cmake/deps.txt +++ b/cmake/deps.txt @@ -29,7 +29,8 @@ fxdiv;https://github.com/Maratyszcza/FXdiv/archive/63058eff77e11aa15bf531df5dd34 google_benchmark;https://github.com/google/benchmark/archive/refs/tags/v1.8.5.zip;cd47d3d272faf353600c8cc2fdec2b52d6f69177 google_nsync;https://github.com/google/nsync/archive/refs/tags/1.26.0.zip;5e7c00ef6bf5b787386fc040067903ec774e2752 googletest;https://github.com/google/googletest/archive/refs/tags/v1.15.0.zip;9d2d0af8d77ac726ea55d44a8fa727ec98311349 -googlexnnpack;https://github.com/google/XNNPACK/archive/0da379fc4808f9601faef392352018c741c0f297.zip;663883491e380b628e0a5b162b5f2658032fae73 +#xnnpack 2024.09.04 +googlexnnpack;https://github.com/google/XNNPACK/archive/309b75c9e56e0a674bf78d59872ce131f814dfb6.zip;39FA5259EAEACE0547284B63D5CEDC4F05553F5A json;https://github.com/nlohmann/json/archive/refs/tags/v3.10.5.zip;f257f8dc27c5b8c085dc887b40cddd18ae1f725c microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip;cf368104cd22a87b4dd0c80228919bb2df3e2a14 microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.230629.1.zip;e4a542a323c070376f7c2d1973d0f7ddbc1d2fa5 diff --git a/cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch b/cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch index 736fffb1e384c..2c1d1e78c6dbf 100644 --- a/cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch +++ b/cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch @@ -1,8 +1,8 @@ diff --git a/CMakeLists.txt b/CMakeLists.txt -index dba9b4687..a4345898d 100755 +index 1ff85b538..37abdd0a7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt -@@ -122,7 +122,7 @@ ENDIF() +@@ -253,7 +253,7 @@ ENDIF() # ---[ Build flags IF(NOT CMAKE_SYSTEM_NAME) MESSAGE(FATAL_ERROR "CMAKE_SYSTEM_NAME not defined") @@ -11,29 +11,27 @@ index dba9b4687..a4345898d 100755 MESSAGE(FATAL_ERROR "Unrecognized CMAKE_SYSTEM_NAME value \"${CMAKE_SYSTEM_NAME}\"") ENDIF() IF(CMAKE_SYSTEM_NAME MATCHES "Windows") -@@ -534,7 +534,12 @@ IF(XNNPACK_BUILD_LIBRARY) - TARGET_LINK_LIBRARIES(operator-utils PRIVATE logging) - TARGET_LINK_LIBRARIES(post-operation PRIVATE logging) - TARGET_LINK_LIBRARIES(subgraph PRIVATE allocator logging memory mutex operators operator-run) -- TARGET_LINK_LIBRARIES(XNNPACK PRIVATE allocator cache hardware-config indirection jit logging memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing post-operation microkernels-prod subgraph) +@@ -763,7 +763,12 @@ IF(XNNPACK_BUILD_LIBRARY) + TARGET_LINK_LIBRARIES(operator-run PRIVATE xnnpack-base logging) + TARGET_LINK_LIBRARIES(operator-utils PRIVATE xnnpack-base logging) + TARGET_LINK_LIBRARIES(subgraph PRIVATE xnnpack-base allocator logging memory mutex operators operator-run) +- TARGET_LINK_LIBRARIES(XNNPACK PRIVATE allocator cache hardware-config indirection logging memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing microkernels-prod subgraph) + IF(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") -+ # omit microkernels-prod as the list is manually created by ORT in cmake/external/xnnpack.cmake -+ TARGET_LINK_LIBRARIES(XNNPACK PRIVATE allocator cache hardware-config indirection jit logging memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing post-operation subgraph) ++ # omit microkernels-prod as the list is manually created by ORT in cmake/external/xnnpack.cmake ++ TARGET_LINK_LIBRARIES(XNNPACK PRIVATE allocator cache hardware-config indirection jit logging memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing post-operation subgraph) + ELSE() -+ TARGET_LINK_LIBRARIES(XNNPACK PRIVATE allocator cache hardware-config indirection jit logging memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing post-operation microkernels-prod subgraph) -+ ENDIF() ++ TARGET_LINK_LIBRARIES(XNNPACK PRIVATE allocator cache hardware-config indirection jit logging memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing post-operation microkernels-prod subgraph) ++ ENDIF() + TARGET_LINK_LIBRARIES(XNNPACK PUBLIC xnnpack-base) SET_TARGET_PROPERTIES(XNNPACK PROPERTIES C_EXTENSIONS YES) ENDIF() - IF(NOT MSVC) -@@ -543,8 +548,9 @@ ENDIF() +@@ -772,7 +777,8 @@ IF(NOT MSVC) + ENDIF() IF(XNNPACK_TARGET_PROCESSOR STREQUAL "arm") SET_PROPERTY(SOURCE ${ALL_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -marm ") - SET_PROPERTY(SOURCE ${PROD_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -marm ") - SET_PROPERTY(SOURCE ${ALL_ARMSIMD32_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv6 -mfpu=vfp -munaligned-access ") -- SET_PROPERTY(SOURCE ${PROD_ARMSIMD32_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv6 -mfpu=vfp -munaligned-access ") + # set this to armv7-a to workaround build issue. we don't target armv6 so it shouldn't matter -+ SET_PROPERTY(SOURCE ${ALL_ARMSIMD32_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv7-a -mfpu=vfp -munaligned-access ") -+ SET_PROPERTY(SOURCE ${PROD_ARMSIMD32_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv7-a -mfpu=vfp -munaligned-access ") ++ SET_PROPERTY(SOURCE ${ALL_ARMSIMD32_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv7-a -mfpu=vfp -munaligned-access ") SET_PROPERTY(SOURCE ${ALL_NEON_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv7-a -mfpu=neon ") - SET_PROPERTY(SOURCE ${PROD_NEON_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv7-a -mfpu=neon ") SET_PROPERTY(SOURCE ${ALL_NEONFP16_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv7-a -mfpu=neon-fp16 ") + # GCC requires -mfp16-format=ieee to define __fp16 type, but Clang doesn't support this option at all. diff --git a/onnxruntime/core/providers/xnnpack/math/softmax.cc b/onnxruntime/core/providers/xnnpack/math/softmax.cc index 87440b7814176..19c18fa29d6e6 100644 --- a/onnxruntime/core/providers/xnnpack/math/softmax.cc +++ b/onnxruntime/core/providers/xnnpack/math/softmax.cc @@ -166,24 +166,21 @@ Softmax::Softmax(const OpKernelInfo& info) : XnnpackKernel{info} { if (op_type_ == OpComputeType::op_compute_type_qu8) { // the order of input tensor, x,x_scale, x_zp, y_scale, y_zp OpQuantParam quant_param = ParseQuantParamForOp(info, x_dtype, 1); - xstatus = xnn_create_softmax_nc_qu8(channels, - channels, - channels, - quant_param[0].first[0], // x_scale - quant_param[1].second, // y_zp - quant_param[1].first[0], // y_scale + xstatus = xnn_create_softmax_nc_qu8( + quant_param[0].first[0], // x_scale, input scale + quant_param[1].second, // y_zp, output zero point + quant_param[1].first[0], // y_scale, output scale 0, // flags, &p); } else if (op_type_ == OpComputeType::op_compute_type_fp32) { - xstatus = xnn_create_softmax_nc_f32(channels, - channels, - channels, + xstatus = xnn_create_softmax_nc_f32( 0, // flags, &p); } - ORT_ENFORCE(xstatus == xnn_status_success, "xnn_create_softmax_nc_", + ORT_ENFORCE(xstatus == xnn_status_success && p != nullptr, "xnn_create_softmax_nc_", OpTypeToString(op_type_), " failed. Status:", xstatus); + channel_dim_ = channels; op0_.reset(p); } @@ -205,7 +202,7 @@ Status Softmax::Compute(OpKernelContext* ctx) const { auto reshape_fn = op_type_ == OpComputeType::op_compute_type_qu8 ? xnn_reshape_softmax_nc_qu8 : xnn_reshape_softmax_nc_f32; - status = reshape_fn(op0_.get(), N, threadpool); + status = reshape_fn(op0_.get(), channel_dim_, channel_dim_, channel_dim_, N, threadpool); if (status != xnn_status_success) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_reshape_softmax_nc_", OpTypeToString(op_type_), diff --git a/onnxruntime/core/providers/xnnpack/math/softmax.h b/onnxruntime/core/providers/xnnpack/math/softmax.h index 8c6fba6c822a1..9a8055ff34a57 100644 --- a/onnxruntime/core/providers/xnnpack/math/softmax.h +++ b/onnxruntime/core/providers/xnnpack/math/softmax.h @@ -23,6 +23,7 @@ class Softmax final : public XnnpackKernel { int opset_; OpComputeType op_type_ = OpComputeType::op_compute_type_invalid; XnnpackOperator op0_; + int64_t channel_dim_; }; } // namespace xnnpack } // namespace onnxruntime diff --git a/onnxruntime/core/providers/xnnpack/nn/average_pool.cc b/onnxruntime/core/providers/xnnpack/nn/average_pool.cc index 58c209a13cd0c..c1fb6db93b0aa 100644 --- a/onnxruntime/core/providers/xnnpack/nn/average_pool.cc +++ b/onnxruntime/core/providers/xnnpack/nn/average_pool.cc @@ -15,7 +15,6 @@ namespace onnxruntime { namespace xnnpack { namespace { Status CreateXnnpackKernel(const PoolAttributes& pool_attrs, - int64_t C, const std::optional>& clip_min_max, struct xnn_operator*& p, const OpQuantParam& quant_param, @@ -42,7 +41,6 @@ Status CreateXnnpackKernel(const PoolAttributes& pool_attrs, input_padding_bottom, input_padding_left, pooling_height, pooling_width, stride_height, stride_width, - C, C, C, // channels, input_pixel_stride, output_pixel_stride foutput_min, foutput_max, flags, &p); } else if (avgpool_type == OpComputeType::op_compute_type_qu8) { const float output_scale = quant_param[1].first[0]; @@ -53,7 +51,6 @@ Status CreateXnnpackKernel(const PoolAttributes& pool_attrs, input_padding_bottom, input_padding_left, pooling_height, pooling_width, stride_height, stride_width, - C, C, C, // channels, input_pixel_stride, output_pixel_stride quant_param[0].second, quant_param[0].first[0], quant_param[1].second, @@ -209,10 +206,11 @@ AveragePool::AveragePool(const OpKernelInfo& info) ORT_THROW("unsupported AveragePool in XnnpackEP, we have FLOAT|UINT8, but got ", stype); } struct xnn_operator* p; - auto ret = CreateXnnpackKernel(pool_attrs_, C, clip_min_max_, p, + auto ret = CreateXnnpackKernel(pool_attrs_, clip_min_max_, p, quant_param, avgpool_type_); ORT_ENFORCE(ret.IsOK(), ret.ErrorMessage()); op0_.reset(p); + channels_ = C; } Status AveragePool::Compute(OpKernelContext* context) const { @@ -247,7 +245,7 @@ Status AveragePool::Compute(OpKernelContext* context) const { ? xnn_reshape_average_pooling2d_nhwc_f32 : xnn_reshape_average_pooling2d_nhwc_qu8; - auto status = reshape_fn(op0_.get(), N, H, W, + auto status = reshape_fn(op0_.get(), N, H, W, channels_, channels_, channels_, &workspace_size, &workspace_alignment, /*output_height_out=*/nullptr, /*output_width_out=*/nullptr, threadpool); diff --git a/onnxruntime/core/providers/xnnpack/nn/average_pool.h b/onnxruntime/core/providers/xnnpack/nn/average_pool.h index 1626fdc21bcbd..eb5f0b12b9e9b 100644 --- a/onnxruntime/core/providers/xnnpack/nn/average_pool.h +++ b/onnxruntime/core/providers/xnnpack/nn/average_pool.h @@ -30,6 +30,7 @@ class AveragePool : public XnnpackKernel { TensorShapeVector output_dims_; XnnpackOperator op0_; + int64_t channels_; std::optional> clip_min_max_; OpComputeType avgpool_type_ = OpComputeType::op_compute_type_invalid; }; diff --git a/onnxruntime/core/providers/xnnpack/nn/max_pool.cc b/onnxruntime/core/providers/xnnpack/nn/max_pool.cc index 2ef9f97f77b14..a7436ec40c477 100644 --- a/onnxruntime/core/providers/xnnpack/nn/max_pool.cc +++ b/onnxruntime/core/providers/xnnpack/nn/max_pool.cc @@ -172,7 +172,6 @@ MaxPool::MaxPool(const OpKernelInfo& info) pooling_height, pooling_width, stride_height, stride_width, dilation_height, dilation_width, - C, C, C, // channels, input_pixel_stride, output_pixel_stride foutput_min, foutput_max, flags, &p); } else if (input_dtype == ONNX_NAMESPACE::TensorProto_DataType_UINT8) { maxpool_type_ = OpComputeType::op_compute_type_qu8; @@ -183,7 +182,6 @@ MaxPool::MaxPool(const OpKernelInfo& info) pooling_height, pooling_width, stride_height, stride_width, dilation_height, dilation_width, - C, C, C, // channels, input_pixel_stride, output_pixel_stride output_min, output_max, flags, &p); } else if (input_dtype == ONNX_NAMESPACE::TensorProto_DataType_INT8) { maxpool_type_ = OpComputeType::op_compute_type_qs8; @@ -194,7 +192,6 @@ MaxPool::MaxPool(const OpKernelInfo& info) pooling_height, pooling_width, stride_height, stride_width, dilation_height, dilation_width, - C, C, C, // channels, input_pixel_stride, output_pixel_stride output_min, output_max, flags, &p); } else { auto stype = DataTypeImpl::ToString(DataTypeImpl::TypeFromProto(*X_arg.TypeAsProto())); @@ -213,6 +210,7 @@ Status MaxPool::Compute(OpKernelContext* context) const { int64_t N = X_shape[0]; int64_t H = X_shape[1]; int64_t W = X_shape[2]; + int64_t C = X_shape[3]; // set the N dim to the correct value TensorShapeVector output_dims{output_dims_}; @@ -234,6 +232,7 @@ Status MaxPool::Compute(OpKernelContext* context) const { } auto status = reshape_fn(op0_.get(), N, H, W, + C, C, C, // channels, input_pixel_stride, output_pixel_stride /*output_height_out=*/nullptr, /*output_width_out=*/nullptr, threadpool); if (status != xnn_status_success) { diff --git a/onnxruntime/core/providers/xnnpack/tensor/resize.cc b/onnxruntime/core/providers/xnnpack/tensor/resize.cc index cf874796ba169..16cc70b26d324 100644 --- a/onnxruntime/core/providers/xnnpack/tensor/resize.cc +++ b/onnxruntime/core/providers/xnnpack/tensor/resize.cc @@ -228,11 +228,11 @@ Resize::Resize(const OpKernelInfo& info) : UpsampleBase(info), XnnpackKernel{inf xnn_status xstatus = xnn_status_invalid_state; struct xnn_operator* p = nullptr; if (op_type_ == OpComputeType::op_compute_type_fp32) { - xstatus = xnn_create_resize_bilinear2d_nhwc_f32(channels, channels, channels, flags, &p); + xstatus = xnn_create_resize_bilinear2d_nhwc_f32(channels, channels,flags, &p); } else if (op_type_ == OpComputeType::op_compute_type_qu8) { - xstatus = xnn_create_resize_bilinear2d_nhwc_u8(channels, channels, channels, flags, &p); + xstatus = xnn_create_resize_bilinear2d_nhwc_u8(channels, channels, flags, &p); } else { - xstatus = xnn_create_resize_bilinear2d_nhwc_s8(channels, channels, channels, flags, &p); + xstatus = xnn_create_resize_bilinear2d_nhwc_s8(channels, channels, flags, &p); } ORT_ENFORCE(xstatus == xnn_status_success, "xnn_create_resize_bilinear2d_nhwc_", OpTypeToString(op_type_), " failed. Status:", @@ -248,6 +248,7 @@ Status Resize::ComputeInternal(OpKernelContext* ctx, const Tensor* input, auto N = X_shape[0]; auto H = X_shape[1]; auto W = X_shape[2]; + auto C = X_shape[3]; Tensor* output = ctx->Output(0, TensorShape(output_dims)); pthreadpool_t threadpool = GetThreadPool(); @@ -266,7 +267,7 @@ Status Resize::ComputeInternal(OpKernelContext* ctx, const Tensor* input, reshape_fn = xnn_reshape_resize_bilinear2d_nhwc_s8; } - auto status = reshape_fn(op0_.get(), N, H, W, output_dims[1], output_dims[2], + auto status = reshape_fn(op0_.get(), N, H, W, C, output_dims[1], output_dims[2], &workspace_size, &workspace_alignment, threadpool); if (status != xnn_status_success) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_reshape_resize_bilinear2d_nhwc_", OpTypeToString(op_type_), diff --git a/onnxruntime/core/providers/xnnpack/xnnpack_kernel.h b/onnxruntime/core/providers/xnnpack/xnnpack_kernel.h index 0978a88288114..31512586be19d 100644 --- a/onnxruntime/core/providers/xnnpack/xnnpack_kernel.h +++ b/onnxruntime/core/providers/xnnpack/xnnpack_kernel.h @@ -48,7 +48,7 @@ class XnnpackKernel : public OpKernel { // auto_code_cache.reset(&code_cache_); #endif // status = xnn_init_weights_cache(&weights_cache_); - xnn_weights_cache_t weights_cache = nullptr; + xnn_weights_cache_t weights_cache_provider = nullptr; status = xnn_create_weights_cache(&weights_cache, 0); ORT_ENFORCE(status == xnn_status_success, "Failed to create XNNPACK weights cache"); auto_weights_cache.reset(weights_cache); @@ -57,7 +57,7 @@ class XnnpackKernel : public OpKernel { } // std::unique_ptr auto_code_cache; - std::unique_ptr auto_weights_cache; + std::unique_ptr auto_weights_cache; // private: // #if defined(XNN_CACHE_ENABLE) && XNN_PLATFORM_JIT From 71411f2641642778c373d3f20bff5f77846c5377 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Fri, 6 Sep 2024 15:01:25 +0800 Subject: [PATCH 02/25] v1.0.183 --- .../github/azure-pipelines/templates/download-deps.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/ci_build/github/azure-pipelines/templates/download-deps.yml b/tools/ci_build/github/azure-pipelines/templates/download-deps.yml index 2b600d1be2d01..7ffcabd532b22 100644 --- a/tools/ci_build/github/azure-pipelines/templates/download-deps.yml +++ b/tools/ci_build/github/azure-pipelines/templates/download-deps.yml @@ -11,7 +11,7 @@ steps: packageType: upack feed: '/7424c8e4-5c62-490e-95c4-79446f31017c' definition: '517c4f6f-5437-4392-a70d-4f15ec5be2f0' - version: 1.0.178 + version: 1.0.183 downloadPath: $(Build.BinariesDirectory)/deps # The private ADO project @@ -22,7 +22,7 @@ steps: packageType: upack feed: '/4c7631f5-24c0-4307-8822-1aa8f180c325' definition: 'fd9dd5ad-b73e-4678-890e-edcf680dbc1a' - version: 1.0.178 + version: 1.0.183 downloadPath: $(Build.BinariesDirectory)/deps # You can add more ADO accounts at here. From 2c518bedc47229b55e93027aa5408f1c432fa4ed Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Fri, 6 Sep 2024 15:23:37 +0800 Subject: [PATCH 03/25] update --- onnxruntime/core/providers/xnnpack/nn/average_pool.cc | 4 ++-- onnxruntime/core/providers/xnnpack/nn/average_pool.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/onnxruntime/core/providers/xnnpack/nn/average_pool.cc b/onnxruntime/core/providers/xnnpack/nn/average_pool.cc index c1fb6db93b0aa..b31b5a94899bf 100644 --- a/onnxruntime/core/providers/xnnpack/nn/average_pool.cc +++ b/onnxruntime/core/providers/xnnpack/nn/average_pool.cc @@ -210,7 +210,6 @@ AveragePool::AveragePool(const OpKernelInfo& info) quant_param, avgpool_type_); ORT_ENFORCE(ret.IsOK(), ret.ErrorMessage()); op0_.reset(p); - channels_ = C; } Status AveragePool::Compute(OpKernelContext* context) const { @@ -220,6 +219,7 @@ Status AveragePool::Compute(OpKernelContext* context) const { int64_t N = X_shape[0]; int64_t H = X_shape[1]; int64_t W = X_shape[2]; + int64_t C = X_shape[3]; // set the N dim to the correct value TensorShapeVector output_dims{output_dims_}; @@ -245,7 +245,7 @@ Status AveragePool::Compute(OpKernelContext* context) const { ? xnn_reshape_average_pooling2d_nhwc_f32 : xnn_reshape_average_pooling2d_nhwc_qu8; - auto status = reshape_fn(op0_.get(), N, H, W, channels_, channels_, channels_, + auto status = reshape_fn(op0_.get(), N, H, W, C, C, C, &workspace_size, &workspace_alignment, /*output_height_out=*/nullptr, /*output_width_out=*/nullptr, threadpool); diff --git a/onnxruntime/core/providers/xnnpack/nn/average_pool.h b/onnxruntime/core/providers/xnnpack/nn/average_pool.h index eb5f0b12b9e9b..1626fdc21bcbd 100644 --- a/onnxruntime/core/providers/xnnpack/nn/average_pool.h +++ b/onnxruntime/core/providers/xnnpack/nn/average_pool.h @@ -30,7 +30,6 @@ class AveragePool : public XnnpackKernel { TensorShapeVector output_dims_; XnnpackOperator op0_; - int64_t channels_; std::optional> clip_min_max_; OpComputeType avgpool_type_ = OpComputeType::op_compute_type_invalid; }; From 92eb0ccc185bc0fa049f4b6a523088084c9981fb Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Sun, 8 Sep 2024 15:39:24 +0800 Subject: [PATCH 04/25] update patch --- cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch b/cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch index 2c1d1e78c6dbf..3abf2d3afec42 100644 --- a/cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch +++ b/cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch @@ -1,5 +1,5 @@ diff --git a/CMakeLists.txt b/CMakeLists.txt -index 1ff85b538..37abdd0a7 100644 +index 1ff85b538..c3ef2183f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -253,7 +253,7 @@ ENDIF() @@ -18,9 +18,9 @@ index 1ff85b538..37abdd0a7 100644 - TARGET_LINK_LIBRARIES(XNNPACK PRIVATE allocator cache hardware-config indirection logging memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing microkernels-prod subgraph) + IF(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + # omit microkernels-prod as the list is manually created by ORT in cmake/external/xnnpack.cmake -+ TARGET_LINK_LIBRARIES(XNNPACK PRIVATE allocator cache hardware-config indirection jit logging memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing post-operation subgraph) ++ TARGET_LINK_LIBRARIES(XNNPACK PRIVATE allocator cache hardware-config indirection logging memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing subgraph) + ELSE() -+ TARGET_LINK_LIBRARIES(XNNPACK PRIVATE allocator cache hardware-config indirection jit logging memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing post-operation microkernels-prod subgraph) ++ TARGET_LINK_LIBRARIES(XNNPACK PRIVATE allocator cache hardware-config indirection logging memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing microkernels-prod subgraph) + ENDIF() TARGET_LINK_LIBRARIES(XNNPACK PUBLIC xnnpack-base) SET_TARGET_PROPERTIES(XNNPACK PROPERTIES C_EXTENSIONS YES) From 7fcd9a5cb751b9ada5f5413bb1f531812f6f49af Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Sun, 8 Sep 2024 16:30:41 +0800 Subject: [PATCH 05/25] add kleidiai for builing mac arm --- cgmanifests/generated/cgmanifest.json | 42 +++++-------------- cmake/deps.txt | 1 + .../templates/download-deps.yml | 4 +- 3 files changed, 14 insertions(+), 33 deletions(-) diff --git a/cgmanifests/generated/cgmanifest.json b/cgmanifests/generated/cgmanifest.json index d9e2c3ac5cff6..51f75cb6200f2 100644 --- a/cgmanifests/generated/cgmanifest.json +++ b/cgmanifests/generated/cgmanifest.json @@ -2,36 +2,6 @@ "$schema": "https://json.schemastore.org/component-detection-manifest.json", "Version": 1, "Registrations": [ - { - "component": { - "type": "git", - "git": { - "commitHash": "d52c46520124845b1e0e0525f2759299d840143f", - "repositoryUrl": "https://github.com/emscripten-core/emsdk.git" - }, - "comments": "git submodule at cmake/external/emsdk" - } - }, - { - "component": { - "type": "git", - "git": { - "commitHash": "7a2ed51a6b682a83e345ff49fc4cfd7ca47550db", - "repositoryUrl": "https://github.com/google/libprotobuf-mutator.git" - }, - "comments": "git submodule at cmake/external/libprotobuf-mutator" - } - }, - { - "component": { - "type": "git", - "git": { - "commitHash": "595228d99e3977ac27cb79d5963adda262af99ad", - "repositoryUrl": "https://github.com/onnx/onnx.git" - }, - "comments": "git submodule at cmake/external/onnx" - } - }, { "component": { "type": "git", @@ -146,7 +116,7 @@ "component": { "type": "git", "git": { - "commitHash": "39FA5259EAEACE0547284B63D5CEDC4F05553F5A", + "commitHash": "309b75c9e56e0a674bf78d59872ce131f814dfb6", "repositoryUrl": "https://github.com/google/XNNPACK.git" }, "comments": "googlexnnpack" @@ -212,6 +182,16 @@ "comments": "neural_speed" } }, + { + "component": { + "type": "git", + "git": { + "commitHash": "595228d99e3977ac27cb79d5963adda262af99ad", + "repositoryUrl": "https://github.com/onnx/onnx.git" + }, + "comments": "onnx" + } + }, { "component": { "type": "git", diff --git a/cmake/deps.txt b/cmake/deps.txt index 7e3dfa8eb7bab..342184bda2f0e 100644 --- a/cmake/deps.txt +++ b/cmake/deps.txt @@ -61,3 +61,4 @@ composable_kernel;https://github.com/ROCmSoftwarePlatform/composable_kernel/arch directx_headers;https://github.com/microsoft/DirectX-Headers/archive/refs/tags/v1.613.1.zip;47653509a3371eabb156360f42faf582f314bf2e cudnn_frontend;https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.5.2.zip;11071a47594b20f00af09aad83e0d5203ccf6029 dawn;https://github.com/google/dawn/archive/511eb80847afe6bded34ec491a38d5d78ba2d604.zip;c493f5aca5586f6634e25d0121c85df71189fb99 +kleidiai;https://gitlab.arm.com/kleidi/kleidiai/-/archive/v0.2.0/kleidiai-v0.2.0.zip;B1E3173992FD91F20DB904AB77D6E901778C2681 diff --git a/tools/ci_build/github/azure-pipelines/templates/download-deps.yml b/tools/ci_build/github/azure-pipelines/templates/download-deps.yml index 7ffcabd532b22..cbba1cb8ba8bd 100644 --- a/tools/ci_build/github/azure-pipelines/templates/download-deps.yml +++ b/tools/ci_build/github/azure-pipelines/templates/download-deps.yml @@ -11,7 +11,7 @@ steps: packageType: upack feed: '/7424c8e4-5c62-490e-95c4-79446f31017c' definition: '517c4f6f-5437-4392-a70d-4f15ec5be2f0' - version: 1.0.183 + version: 1.0.184 downloadPath: $(Build.BinariesDirectory)/deps # The private ADO project @@ -22,7 +22,7 @@ steps: packageType: upack feed: '/4c7631f5-24c0-4307-8822-1aa8f180c325' definition: 'fd9dd5ad-b73e-4678-890e-edcf680dbc1a' - version: 1.0.183 + version: 1.0.184 downloadPath: $(Build.BinariesDirectory)/deps # You can add more ADO accounts at here. From 3c8853d434b59076b40418fd689f96f3257203bd Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Sun, 8 Sep 2024 17:26:02 +0800 Subject: [PATCH 06/25] download kleidiai --- cmake/external/xnnpack.cmake | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cmake/external/xnnpack.cmake b/cmake/external/xnnpack.cmake index 41f02ce6f22bc..6015f05377348 100644 --- a/cmake/external/xnnpack.cmake +++ b/cmake/external/xnnpack.cmake @@ -39,6 +39,10 @@ set(XNNPACK_INCLUDE_DIR ${XNNPACK_DIR}/include) set(onnxruntime_EXTERNAL_LIBRARIES_XNNPACK XNNPACK pthreadpool) +if(onnxruntime_target_platform MATCHES "ARM-like") + FetchContent_Declare(KleidiAI URL ${DEP_URL_kleidiai} URL_HASH SHA1=${DEP_SHA1_kleidiai}) + onnxruntime_fetchcontent_makeavailable(kleidiai) + set(onnxruntime_EXTERNAL_LIBRARIES_XNNPACK ${onnxruntime_EXTERNAL_LIBRARIES_XNNPACK} kleidiai) # the XNNPACK CMake setup doesn't include the WASM kernels so we have to manually set those up if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") From c41b40dd5a7b10b10eb13459211f270980f84c69 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Sun, 8 Sep 2024 17:26:43 +0800 Subject: [PATCH 07/25] one message --- cmake/external/xnnpack.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/external/xnnpack.cmake b/cmake/external/xnnpack.cmake index 6015f05377348..87415373de52d 100644 --- a/cmake/external/xnnpack.cmake +++ b/cmake/external/xnnpack.cmake @@ -42,6 +42,7 @@ set(onnxruntime_EXTERNAL_LIBRARIES_XNNPACK XNNPACK pthreadpool) if(onnxruntime_target_platform MATCHES "ARM-like") FetchContent_Declare(KleidiAI URL ${DEP_URL_kleidiai} URL_HASH SHA1=${DEP_SHA1_kleidiai}) onnxruntime_fetchcontent_makeavailable(kleidiai) + message(STATUS, "Adding KleidiAI to XNNPACK") set(onnxruntime_EXTERNAL_LIBRARIES_XNNPACK ${onnxruntime_EXTERNAL_LIBRARIES_XNNPACK} kleidiai) # the XNNPACK CMake setup doesn't include the WASM kernels so we have to manually set those up From 679adb1f115406f1379dab1ccc6622eaadece222 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Sun, 8 Sep 2024 17:39:12 +0800 Subject: [PATCH 08/25] try kleidiai package --- cmake/external/xnnpack.cmake | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/cmake/external/xnnpack.cmake b/cmake/external/xnnpack.cmake index 87415373de52d..a0493d4ef11e7 100644 --- a/cmake/external/xnnpack.cmake +++ b/cmake/external/xnnpack.cmake @@ -37,13 +37,16 @@ onnxruntime_fetchcontent_makeavailable(googlexnnpack) set(XNNPACK_DIR ${googlexnnpack_SOURCE_DIR}) set(XNNPACK_INCLUDE_DIR ${XNNPACK_DIR}/include) -set(onnxruntime_EXTERNAL_LIBRARIES_XNNPACK XNNPACK pthreadpool) +FetchContent_Declare(KleidiAI URL ${DEP_URL_kleidiai} URL_HASH SHA1=${DEP_SHA1_kleidiai}) +onnxruntime_fetchcontent_makeavailable(kleidiai) -if(onnxruntime_target_platform MATCHES "ARM-like") - FetchContent_Declare(KleidiAI URL ${DEP_URL_kleidiai} URL_HASH SHA1=${DEP_SHA1_kleidiai}) - onnxruntime_fetchcontent_makeavailable(kleidiai) - message(STATUS, "Adding KleidiAI to XNNPACK") - set(onnxruntime_EXTERNAL_LIBRARIES_XNNPACK ${onnxruntime_EXTERNAL_LIBRARIES_XNNPACK} kleidiai) +set(onnxruntime_EXTERNAL_LIBRARIES_XNNPACK XNNPACK pthreadpool kleidiai) + +#if(onnxruntime_target_platform MATCHES "ARM-like") +# FetchContent_Declare(KleidiAI URL ${DEP_URL_kleidiai} URL_HASH SHA1=${DEP_SHA1_kleidiai}) +# onnxruntime_fetchcontent_makeavailable(kleidiai) +# message(STATUS, "Adding KleidiAI to XNNPACK") +# set(onnxruntime_EXTERNAL_LIBRARIES_XNNPACK ${onnxruntime_EXTERNAL_LIBRARIES_XNNPACK} kleidiai) # the XNNPACK CMake setup doesn't include the WASM kernels so we have to manually set those up if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") From aa589269a996bbe1543f032feabd8530881fb8f4 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Sun, 8 Sep 2024 17:45:56 +0800 Subject: [PATCH 09/25] update --- cmake/external/xnnpack.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/external/xnnpack.cmake b/cmake/external/xnnpack.cmake index a0493d4ef11e7..17a44f057edba 100644 --- a/cmake/external/xnnpack.cmake +++ b/cmake/external/xnnpack.cmake @@ -30,6 +30,9 @@ set(FXDIV_SOURCE_DIR ${fxdiv_SOURCE_DIR}) FetchContent_Declare(pthreadpool URL ${DEP_URL_pthreadpool} URL_HASH SHA1=${DEP_SHA1_pthreadpool}) onnxruntime_fetchcontent_makeavailable(pthreadpool) +FetchContent_Declare(kleidiai URL ${DEP_URL_kleidiai} URL_HASH SHA1=${DEP_SHA1_kleidiai}) +onnxruntime_fetchcontent_makeavailable(kleidiai) + FetchContent_Declare(googlexnnpack URL ${DEP_URL_googlexnnpack} URL_HASH SHA1=${DEP_SHA1_googlexnnpack} PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/xnnpack/AddEmscriptenAndIosSupport.patch ) @@ -37,9 +40,6 @@ onnxruntime_fetchcontent_makeavailable(googlexnnpack) set(XNNPACK_DIR ${googlexnnpack_SOURCE_DIR}) set(XNNPACK_INCLUDE_DIR ${XNNPACK_DIR}/include) -FetchContent_Declare(KleidiAI URL ${DEP_URL_kleidiai} URL_HASH SHA1=${DEP_SHA1_kleidiai}) -onnxruntime_fetchcontent_makeavailable(kleidiai) - set(onnxruntime_EXTERNAL_LIBRARIES_XNNPACK XNNPACK pthreadpool kleidiai) #if(onnxruntime_target_platform MATCHES "ARM-like") From c31b77926ce6520fe5a2e50684dc52de8243bb32 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 9 Sep 2024 09:04:59 +0800 Subject: [PATCH 10/25] fix lint issue --- onnxruntime/core/providers/xnnpack/math/softmax.cc | 14 +++++++------- onnxruntime/core/providers/xnnpack/nn/max_pool.cc | 2 +- .../core/providers/xnnpack/tensor/resize.cc | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/onnxruntime/core/providers/xnnpack/math/softmax.cc b/onnxruntime/core/providers/xnnpack/math/softmax.cc index 19c18fa29d6e6..0dc1e69140138 100644 --- a/onnxruntime/core/providers/xnnpack/math/softmax.cc +++ b/onnxruntime/core/providers/xnnpack/math/softmax.cc @@ -167,15 +167,15 @@ Softmax::Softmax(const OpKernelInfo& info) : XnnpackKernel{info} { // the order of input tensor, x,x_scale, x_zp, y_scale, y_zp OpQuantParam quant_param = ParseQuantParamForOp(info, x_dtype, 1); xstatus = xnn_create_softmax_nc_qu8( - quant_param[0].first[0], // x_scale, input scale - quant_param[1].second, // y_zp, output zero point - quant_param[1].first[0], // y_scale, output scale - 0, // flags, - &p); + quant_param[0].first[0], // x_scale, input scale + quant_param[1].second, // y_zp, output zero point + quant_param[1].first[0], // y_scale, output scale + 0, // flags, + &p); } else if (op_type_ == OpComputeType::op_compute_type_fp32) { xstatus = xnn_create_softmax_nc_f32( - 0, // flags, - &p); + 0, // flags, + &p); } ORT_ENFORCE(xstatus == xnn_status_success && p != nullptr, "xnn_create_softmax_nc_", diff --git a/onnxruntime/core/providers/xnnpack/nn/max_pool.cc b/onnxruntime/core/providers/xnnpack/nn/max_pool.cc index a7436ec40c477..0f0b827974f66 100644 --- a/onnxruntime/core/providers/xnnpack/nn/max_pool.cc +++ b/onnxruntime/core/providers/xnnpack/nn/max_pool.cc @@ -232,7 +232,7 @@ Status MaxPool::Compute(OpKernelContext* context) const { } auto status = reshape_fn(op0_.get(), N, H, W, - C, C, C, // channels, input_pixel_stride, output_pixel_stride + C, C, C, // channels, input_pixel_stride, output_pixel_stride /*output_height_out=*/nullptr, /*output_width_out=*/nullptr, threadpool); if (status != xnn_status_success) { diff --git a/onnxruntime/core/providers/xnnpack/tensor/resize.cc b/onnxruntime/core/providers/xnnpack/tensor/resize.cc index 16cc70b26d324..ec92b2bc86b5f 100644 --- a/onnxruntime/core/providers/xnnpack/tensor/resize.cc +++ b/onnxruntime/core/providers/xnnpack/tensor/resize.cc @@ -228,7 +228,7 @@ Resize::Resize(const OpKernelInfo& info) : UpsampleBase(info), XnnpackKernel{inf xnn_status xstatus = xnn_status_invalid_state; struct xnn_operator* p = nullptr; if (op_type_ == OpComputeType::op_compute_type_fp32) { - xstatus = xnn_create_resize_bilinear2d_nhwc_f32(channels, channels,flags, &p); + xstatus = xnn_create_resize_bilinear2d_nhwc_f32(channels, channels, flags, &p); } else if (op_type_ == OpComputeType::op_compute_type_qu8) { xstatus = xnn_create_resize_bilinear2d_nhwc_u8(channels, channels, flags, &p); } else { From d6e54b7a12971564891781ef2ccc353624816ff9 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 9 Sep 2024 10:13:31 +0800 Subject: [PATCH 11/25] kleidiai only for arm64 --- cmake/external/xnnpack.cmake | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/cmake/external/xnnpack.cmake b/cmake/external/xnnpack.cmake index 17a44f057edba..9055a985559c7 100644 --- a/cmake/external/xnnpack.cmake +++ b/cmake/external/xnnpack.cmake @@ -30,8 +30,12 @@ set(FXDIV_SOURCE_DIR ${fxdiv_SOURCE_DIR}) FetchContent_Declare(pthreadpool URL ${DEP_URL_pthreadpool} URL_HASH SHA1=${DEP_SHA1_pthreadpool}) onnxruntime_fetchcontent_makeavailable(pthreadpool) -FetchContent_Declare(kleidiai URL ${DEP_URL_kleidiai} URL_HASH SHA1=${DEP_SHA1_kleidiai}) -onnxruntime_fetchcontent_makeavailable(kleidiai) +# https://github.com/google/XNNPACK/blob/3b3f7b8a6668f6ab3b6ce33b9f1d1fce971549d1/CMakeLists.txt#L206C82-L206C117 +if(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm64.*" AND NOT CMAKE_C_COMPILER_ID STREQUAL "MSVC") + FetchContent_Declare(kleidiai URL ${DEP_URL_kleidiai} URL_HASH SHA1=${DEP_SHA1_kleidiai}) + onnxruntime_fetchcontent_makeavailable(kleidiai) +endif() + FetchContent_Declare(googlexnnpack URL ${DEP_URL_googlexnnpack} URL_HASH SHA1=${DEP_SHA1_googlexnnpack} PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/xnnpack/AddEmscriptenAndIosSupport.patch @@ -40,13 +44,10 @@ onnxruntime_fetchcontent_makeavailable(googlexnnpack) set(XNNPACK_DIR ${googlexnnpack_SOURCE_DIR}) set(XNNPACK_INCLUDE_DIR ${XNNPACK_DIR}/include) -set(onnxruntime_EXTERNAL_LIBRARIES_XNNPACK XNNPACK pthreadpool kleidiai) - -#if(onnxruntime_target_platform MATCHES "ARM-like") -# FetchContent_Declare(KleidiAI URL ${DEP_URL_kleidiai} URL_HASH SHA1=${DEP_SHA1_kleidiai}) -# onnxruntime_fetchcontent_makeavailable(kleidiai) -# message(STATUS, "Adding KleidiAI to XNNPACK") -# set(onnxruntime_EXTERNAL_LIBRARIES_XNNPACK ${onnxruntime_EXTERNAL_LIBRARIES_XNNPACK} kleidiai) +set(onnxruntime_EXTERNAL_LIBRARIES_XNNPACK XNNPACK pthreadpool) +if(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm64.*" AND NOT CMAKE_C_COMPILER_ID STREQUAL "MSVC") + list(APPEND onnxruntime_EXTERNAL_LIBRARIES_XNNPACK kleidiai) +endif() # the XNNPACK CMake setup doesn't include the WASM kernels so we have to manually set those up if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") From c46fc52208393ec51048c69de60a0b7ee5d69079 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 9 Sep 2024 11:09:15 +0800 Subject: [PATCH 12/25] update cgmanifest --- cgmanifests/generated/cgmanifest.json | 40 ++++++++++++++++++++------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/cgmanifests/generated/cgmanifest.json b/cgmanifests/generated/cgmanifest.json index 51f75cb6200f2..654099958b21b 100644 --- a/cgmanifests/generated/cgmanifest.json +++ b/cgmanifests/generated/cgmanifest.json @@ -2,6 +2,36 @@ "$schema": "https://json.schemastore.org/component-detection-manifest.json", "Version": 1, "Registrations": [ + { + "component": { + "type": "git", + "git": { + "commitHash": "d52c46520124845b1e0e0525f2759299d840143f", + "repositoryUrl": "https://github.com/emscripten-core/emsdk.git" + }, + "comments": "git submodule at cmake/external/emsdk" + } + }, + { + "component": { + "type": "git", + "git": { + "commitHash": "7a2ed51a6b682a83e345ff49fc4cfd7ca47550db", + "repositoryUrl": "https://github.com/google/libprotobuf-mutator.git" + }, + "comments": "git submodule at cmake/external/libprotobuf-mutator" + } + }, + { + "component": { + "type": "git", + "git": { + "commitHash": "595228d99e3977ac27cb79d5963adda262af99ad", + "repositoryUrl": "https://github.com/onnx/onnx.git" + }, + "comments": "git submodule at cmake/external/onnx" + } + }, { "component": { "type": "git", @@ -182,16 +212,6 @@ "comments": "neural_speed" } }, - { - "component": { - "type": "git", - "git": { - "commitHash": "595228d99e3977ac27cb79d5963adda262af99ad", - "repositoryUrl": "https://github.com/onnx/onnx.git" - }, - "comments": "onnx" - } - }, { "component": { "type": "git", From 120fc598053ab72ee3b41ffbbf600f36e0b7a1a3 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Tue, 10 Sep 2024 12:26:49 +0800 Subject: [PATCH 13/25] update resize op --- onnxruntime/core/providers/xnnpack/tensor/resize.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/onnxruntime/core/providers/xnnpack/tensor/resize.cc b/onnxruntime/core/providers/xnnpack/tensor/resize.cc index ec92b2bc86b5f..db5648d5d6e54 100644 --- a/onnxruntime/core/providers/xnnpack/tensor/resize.cc +++ b/onnxruntime/core/providers/xnnpack/tensor/resize.cc @@ -214,8 +214,6 @@ Resize::Resize(const OpKernelInfo& info) : UpsampleBase(info), XnnpackKernel{inf } } - int64_t channels = x_shape->dim(3).dim_value(); - uint32_t flags = 0; ORT_ENFORCE(mode_ == UpsampleMode::LINEAR, "only support bilinear resize"); if (coordinate_transform_mode_ == ResizeCoordinateTransformationMode::ALIGN_CORNERS) { @@ -227,12 +225,14 @@ Resize::Resize(const OpKernelInfo& info) : UpsampleBase(info), XnnpackKernel{inf xnn_status xstatus = xnn_status_invalid_state; struct xnn_operator* p = nullptr; + auto out_h = output_dims_[1]; + auto out_w = output_dims_[2]; if (op_type_ == OpComputeType::op_compute_type_fp32) { - xstatus = xnn_create_resize_bilinear2d_nhwc_f32(channels, channels, flags, &p); + xstatus = xnn_create_resize_bilinear2d_nhwc_f32(out_h, out_w, flags, &p); } else if (op_type_ == OpComputeType::op_compute_type_qu8) { - xstatus = xnn_create_resize_bilinear2d_nhwc_u8(channels, channels, flags, &p); + xstatus = xnn_create_resize_bilinear2d_nhwc_u8(out_h, out_w, flags, &p); } else { - xstatus = xnn_create_resize_bilinear2d_nhwc_s8(channels, channels, flags, &p); + xstatus = xnn_create_resize_bilinear2d_nhwc_s8(out_h, out_w, flags, &p); } ORT_ENFORCE(xstatus == xnn_status_success, "xnn_create_resize_bilinear2d_nhwc_", OpTypeToString(op_type_), " failed. Status:", @@ -267,7 +267,7 @@ Status Resize::ComputeInternal(OpKernelContext* ctx, const Tensor* input, reshape_fn = xnn_reshape_resize_bilinear2d_nhwc_s8; } - auto status = reshape_fn(op0_.get(), N, H, W, C, output_dims[1], output_dims[2], + auto status = reshape_fn(op0_.get(), N, H, W, C, C, C, &workspace_size, &workspace_alignment, threadpool); if (status != xnn_status_success) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_reshape_resize_bilinear2d_nhwc_", OpTypeToString(op_type_), From 2090dbb4b8d695f1db2d84fece3bd00c446d1f26 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Tue, 10 Sep 2024 14:24:35 +0800 Subject: [PATCH 14/25] no cache in iOS --- tools/ci_build/github/azure-pipelines/mac-ios-ci-pipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/github/azure-pipelines/mac-ios-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/mac-ios-ci-pipeline.yml index 48d48156fe913..921974f0d0cf4 100644 --- a/tools/ci_build/github/azure-pipelines/mac-ios-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/mac-ios-ci-pipeline.yml @@ -43,7 +43,7 @@ jobs: xcodeVersion: 14.2 - template: templates/mac-build-step-with-cache.yml parameters: - WithCache: true + WithCache: False Today: $(TODAY) AdditionalKey: onnxruntime CacheDir: $(ORT_CACHE_DIR) From f35c866c03e6d3ba062c899d3585e9f2f95f5a7a Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Tue, 10 Sep 2024 15:14:11 +0800 Subject: [PATCH 15/25] update --- cmake/external/xnnpack.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/external/xnnpack.cmake b/cmake/external/xnnpack.cmake index 9055a985559c7..aab5d31f9e7df 100644 --- a/cmake/external/xnnpack.cmake +++ b/cmake/external/xnnpack.cmake @@ -34,6 +34,7 @@ onnxruntime_fetchcontent_makeavailable(pthreadpool) if(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm64.*" AND NOT CMAKE_C_COMPILER_ID STREQUAL "MSVC") FetchContent_Declare(kleidiai URL ${DEP_URL_kleidiai} URL_HASH SHA1=${DEP_SHA1_kleidiai}) onnxruntime_fetchcontent_makeavailable(kleidiai) + set(KLEIDIAI_SOURCE_DIR ${kleidiai_SOURCE_DIR}) endif() From 2936f7c45fa40e86f64385b17a2d0bfa5937b6c0 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Tue, 10 Sep 2024 17:20:14 +0800 Subject: [PATCH 16/25] Revert "no cache in iOS" This reverts commit 2090dbb4b8d695f1db2d84fece3bd00c446d1f26. --- tools/ci_build/github/azure-pipelines/mac-ios-ci-pipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/github/azure-pipelines/mac-ios-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/mac-ios-ci-pipeline.yml index 921974f0d0cf4..48d48156fe913 100644 --- a/tools/ci_build/github/azure-pipelines/mac-ios-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/mac-ios-ci-pipeline.yml @@ -43,7 +43,7 @@ jobs: xcodeVersion: 14.2 - template: templates/mac-build-step-with-cache.yml parameters: - WithCache: False + WithCache: true Today: $(TODAY) AdditionalKey: onnxruntime CacheDir: $(ORT_CACHE_DIR) From 1c0f420a4479acc1c8c641dbe94fd158def94e34 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Wed, 11 Sep 2024 11:32:13 +0800 Subject: [PATCH 17/25] rm kleidia test program --- cmake/external/xnnpack.cmake | 4 +- cmake/patches/kleidiai/NoTestProjects.patch | 66 +++++++++++++++++++++ 2 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 cmake/patches/kleidiai/NoTestProjects.patch diff --git a/cmake/external/xnnpack.cmake b/cmake/external/xnnpack.cmake index aab5d31f9e7df..386aa574fc400 100644 --- a/cmake/external/xnnpack.cmake +++ b/cmake/external/xnnpack.cmake @@ -32,7 +32,9 @@ onnxruntime_fetchcontent_makeavailable(pthreadpool) # https://github.com/google/XNNPACK/blob/3b3f7b8a6668f6ab3b6ce33b9f1d1fce971549d1/CMakeLists.txt#L206C82-L206C117 if(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm64.*" AND NOT CMAKE_C_COMPILER_ID STREQUAL "MSVC") - FetchContent_Declare(kleidiai URL ${DEP_URL_kleidiai} URL_HASH SHA1=${DEP_SHA1_kleidiai}) + FetchContent_Declare(kleidiai URL ${DEP_URL_kleidiai} URL_HASH SHA1=${DEP_SHA1_kleidiai} + PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/kleidiai/NoTestProjects.patch + ) onnxruntime_fetchcontent_makeavailable(kleidiai) set(KLEIDIAI_SOURCE_DIR ${kleidiai_SOURCE_DIR}) endif() diff --git a/cmake/patches/kleidiai/NoTestProjects.patch b/cmake/patches/kleidiai/NoTestProjects.patch new file mode 100644 index 0000000000000..c5b84c26fee28 --- /dev/null +++ b/cmake/patches/kleidiai/NoTestProjects.patch @@ -0,0 +1,66 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 3e78307..91bd676 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -150,61 +150,6 @@ target_compile_options(kleidiai + PRIVATE ${KLEIDIAI_WARNING_FLAGS} + ) + +-if(KLEIDIAI_BUILD_TESTS) +- enable_testing() +- include(GoogleTest) +- +- add_library(kleidiai_test_framework +- test/common/data_type.cpp +- test/common/data_format.cpp +- test/common/printer.cpp +- test/common/int4.cpp +- test/common/compare.cpp +- test/common/matrix_portion.cpp +- test/common/rect.cpp +- test/common/round.cpp +- test/common/bfloat16.cpp +- test/common/float16.cpp +- test/common/cpu_info.cpp +- test/common/sme.cpp +- +- test/reference/binary_elementwise.cpp +- test/reference/matmul.cpp +- test/reference/fill.cpp +- test/reference/pack.cpp +- test/reference/quantize.cpp +- test/reference/reduce.cpp +- test/reference/transpose.cpp +- test/reference/cast.cpp +- ) +- +- target_compile_options(kleidiai_test_framework +- PUBLIC ${KLEIDIAI_WARNING_FLAGS} +- PUBLIC -march=armv8.2-a+fp16+bf16 +- ) +- +- set_source_files_properties(test/common/sme.cpp PROPERTIES COMPILE_OPTIONS -march=armv8.2-a+sve) +- +- target_link_libraries(kleidiai_test_framework +- PUBLIC kleidiai +- ) +- +- add_executable(kleidiai_test +- test/tests/matmul_test.cpp +- test/tests/matmul_clamp_f32_qai8dxp_qsi4cxp_test.cpp +- test/tests/matmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp +- ) +- +- target_link_libraries(kleidiai_test +- PRIVATE kleidiai_test_framework +- PRIVATE GTest::gtest_main +- ) +- +- # Cross-compiling is a common use case which creates a conflict if DISCOVERY_MODE is set to POST_BUILD (by default) +- # since the host platform does not match the target. Setting the mode to PRE_TEST avoids this conflict. +- gtest_discover_tests(kleidiai_test DISCOVERY_MODE PRE_TEST) +-endif() +- + if(KLEIDIAI_BUILD_BENCHMARK) + # https://github.com/google/benchmark/issues/351 + if(NOT (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ARM")) From 93790c85fd8049ace20b1435de1aee297537ffb5 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Sat, 14 Sep 2024 11:00:47 +0800 Subject: [PATCH 18/25] disable kleidiai test and benchmark --- cmake/external/xnnpack.cmake | 6 +- cmake/patches/kleidiai/NoTestProjects.patch | 66 --------------------- 2 files changed, 3 insertions(+), 69 deletions(-) delete mode 100644 cmake/patches/kleidiai/NoTestProjects.patch diff --git a/cmake/external/xnnpack.cmake b/cmake/external/xnnpack.cmake index 386aa574fc400..fe1047430e76a 100644 --- a/cmake/external/xnnpack.cmake +++ b/cmake/external/xnnpack.cmake @@ -5,6 +5,8 @@ set(FP16_BUILD_TESTS OFF CACHE INTERNAL "") set(FP16_BUILD_BENCHMARKS OFF CACHE INTERNAL "") set(PTHREADPOOL_BUILD_TESTS OFF CACHE INTERNAL "") set(PTHREADPOOL_BUILD_BENCHMARKS OFF CACHE INTERNAL "") +set(KLEIDIAI_BUILD_TESTS OFF CACHE INTERNAL "") +set(KLEIDIAI_BUILD_BENCHMARK OFF CACHE INTERNAL "") if(CMAKE_SYSTEM_PROCESSOR MATCHES "^riscv64.*") set(XNNPACK_USE_SYSTEM_LIBS OFF) @@ -32,9 +34,7 @@ onnxruntime_fetchcontent_makeavailable(pthreadpool) # https://github.com/google/XNNPACK/blob/3b3f7b8a6668f6ab3b6ce33b9f1d1fce971549d1/CMakeLists.txt#L206C82-L206C117 if(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm64.*" AND NOT CMAKE_C_COMPILER_ID STREQUAL "MSVC") - FetchContent_Declare(kleidiai URL ${DEP_URL_kleidiai} URL_HASH SHA1=${DEP_SHA1_kleidiai} - PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/kleidiai/NoTestProjects.patch - ) + FetchContent_Declare(kleidiai URL ${DEP_URL_kleidiai} URL_HASH SHA1=${DEP_SHA1_kleidiai}) onnxruntime_fetchcontent_makeavailable(kleidiai) set(KLEIDIAI_SOURCE_DIR ${kleidiai_SOURCE_DIR}) endif() diff --git a/cmake/patches/kleidiai/NoTestProjects.patch b/cmake/patches/kleidiai/NoTestProjects.patch deleted file mode 100644 index c5b84c26fee28..0000000000000 --- a/cmake/patches/kleidiai/NoTestProjects.patch +++ /dev/null @@ -1,66 +0,0 @@ -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 3e78307..91bd676 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -150,61 +150,6 @@ target_compile_options(kleidiai - PRIVATE ${KLEIDIAI_WARNING_FLAGS} - ) - --if(KLEIDIAI_BUILD_TESTS) -- enable_testing() -- include(GoogleTest) -- -- add_library(kleidiai_test_framework -- test/common/data_type.cpp -- test/common/data_format.cpp -- test/common/printer.cpp -- test/common/int4.cpp -- test/common/compare.cpp -- test/common/matrix_portion.cpp -- test/common/rect.cpp -- test/common/round.cpp -- test/common/bfloat16.cpp -- test/common/float16.cpp -- test/common/cpu_info.cpp -- test/common/sme.cpp -- -- test/reference/binary_elementwise.cpp -- test/reference/matmul.cpp -- test/reference/fill.cpp -- test/reference/pack.cpp -- test/reference/quantize.cpp -- test/reference/reduce.cpp -- test/reference/transpose.cpp -- test/reference/cast.cpp -- ) -- -- target_compile_options(kleidiai_test_framework -- PUBLIC ${KLEIDIAI_WARNING_FLAGS} -- PUBLIC -march=armv8.2-a+fp16+bf16 -- ) -- -- set_source_files_properties(test/common/sme.cpp PROPERTIES COMPILE_OPTIONS -march=armv8.2-a+sve) -- -- target_link_libraries(kleidiai_test_framework -- PUBLIC kleidiai -- ) -- -- add_executable(kleidiai_test -- test/tests/matmul_test.cpp -- test/tests/matmul_clamp_f32_qai8dxp_qsi4cxp_test.cpp -- test/tests/matmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp -- ) -- -- target_link_libraries(kleidiai_test -- PRIVATE kleidiai_test_framework -- PRIVATE GTest::gtest_main -- ) -- -- # Cross-compiling is a common use case which creates a conflict if DISCOVERY_MODE is set to POST_BUILD (by default) -- # since the host platform does not match the target. Setting the mode to PRE_TEST avoids this conflict. -- gtest_discover_tests(kleidiai_test DISCOVERY_MODE PRE_TEST) --endif() -- - if(KLEIDIAI_BUILD_BENCHMARK) - # https://github.com/google/benchmark/issues/351 - if(NOT (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ARM")) From 465c89b3e558846457b5dac0610db03e6a1847a1 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Sat, 14 Sep 2024 22:56:48 +0800 Subject: [PATCH 19/25] add microkernes-prod --- cmake/external/xnnpack.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/external/xnnpack.cmake b/cmake/external/xnnpack.cmake index fe1047430e76a..040b9ea4de541 100644 --- a/cmake/external/xnnpack.cmake +++ b/cmake/external/xnnpack.cmake @@ -47,7 +47,7 @@ onnxruntime_fetchcontent_makeavailable(googlexnnpack) set(XNNPACK_DIR ${googlexnnpack_SOURCE_DIR}) set(XNNPACK_INCLUDE_DIR ${XNNPACK_DIR}/include) -set(onnxruntime_EXTERNAL_LIBRARIES_XNNPACK XNNPACK pthreadpool) +set(onnxruntime_EXTERNAL_LIBRARIES_XNNPACK XNNPACK microkernels-prod pthreadpool) if(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm64.*" AND NOT CMAKE_C_COMPILER_ID STREQUAL "MSVC") list(APPEND onnxruntime_EXTERNAL_LIBRARIES_XNNPACK kleidiai) endif() From 57c246cd5a62091a1a388fb4dcef2b4205231347 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 16 Sep 2024 09:58:43 +0800 Subject: [PATCH 20/25] add ORT_TARGET_PROCESSOR --- cmake/CMakeLists.txt | 41 ++++++++++++++++++++++++++++++++++++ cmake/external/xnnpack.cmake | 4 ++-- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 2c8fb4824d94a..460158146a41a 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -572,6 +572,47 @@ if (CMAKE_CROSSCOMPILING) message("Doing crosscompiling") endif() +# --- [ Determine target processor +IF(CMAKE_OSX_ARCHITECTURES) + LIST(LENGTH CMAKE_OSX_ARCHITECTURES CMAKE_OSX_ARCHITECTURES_COUNT) + IF(CMAKE_OSX_ARCHITECTURES_COUNT GREATER 1) + MESSAGE(FATAL_ERROR "Unsupported Onnxruntime build with multiple OSX architectures (${CMAKE_OSX_ARCHITECTURES}). " + "Specify a single architecture in CMAKE_OSX_ARCHITECTURES and re-configure. ") + ENDIF() + IF(NOT CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64|arm64e|arm64_32)$") + MESSAGE(FATAL_ERROR "Unrecognized CMAKE_OSX_ARCHITECTURES value \"${CMAKE_OSX_ARCHITECTURES}\"") + ENDIF() + SET(ORT_TARGET_PROCESSOR "${CMAKE_OSX_ARCHITECTURES}") + ADD_COMPILE_OPTIONS("-Wno-shorten-64-to-32") +ELSEIF(CMAKE_GENERATOR MATCHES "^Visual Studio " AND CMAKE_GENERATOR_PLATFORM) + IF(CMAKE_GENERATOR_PLATFORM STREQUAL "Win32") + SET(ORT_TARGET_PROCESSOR "x86") + ELSEIF(CMAKE_GENERATOR_PLATFORM STREQUAL "x64") + SET(ORT_TARGET_PROCESSOR "x86_64") + ELSEIF(CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64") + SET(ORT_TARGET_PROCESSOR "arm64") + ELSEIF(CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64EC") + SET(ORT_TARGET_PROCESSOR "arm64") + ELSE() + MESSAGE(FATAL_ERROR "Unsupported Visual Studio architecture \"${CMAKE_GENERATOR_PLATFORM}\"") + ENDIF() +ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "^i[3-7]86$") + SET(ORT_TARGET_PROCESSOR "x86") +ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64") + SET(ORT_TARGET_PROCESSOR "x86_64") +ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "^armv[5-8]") + SET(ORT_TARGET_PROCESSOR "arm") +ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") + SET(ORT_TARGET_PROCESSOR "arm64") +ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "ppc64le") + SET(ORT_TARGET_PROCESSOR "ppc64") +ELSEIF(NOT ORT_TARGET_PROCESSOR MATCHES "^(x86(_64)?|arm64|riscv(32|64|128)|Hexagon|ppc64)$") + SET(ORT_TARGET_PROCESSOR "${CMAKE_SYSTEM_PROCESSOR}") +ELSE() + MESSAGE(FATAL_ERROR "Unrecognized CMAKE_SYSTEM_PROCESSOR value \"${CMAKE_SYSTEM_PROCESSOR}\"") +ENDIF() +MESSAGE(STATUS "Building for ORT_TARGET_PROCESSOR: ${ORT_TARGET_PROCESSOR}") + #Need python to generate def file if (onnxruntime_BUILD_SHARED_LIB OR onnxruntime_ENABLE_PYTHON) if (onnxruntime_ENABLE_PYTHON) diff --git a/cmake/external/xnnpack.cmake b/cmake/external/xnnpack.cmake index 040b9ea4de541..999ebdbfbfe17 100644 --- a/cmake/external/xnnpack.cmake +++ b/cmake/external/xnnpack.cmake @@ -33,7 +33,7 @@ FetchContent_Declare(pthreadpool URL ${DEP_URL_pthreadpool} URL_HASH SHA1=${DEP_ onnxruntime_fetchcontent_makeavailable(pthreadpool) # https://github.com/google/XNNPACK/blob/3b3f7b8a6668f6ab3b6ce33b9f1d1fce971549d1/CMakeLists.txt#L206C82-L206C117 -if(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm64.*" AND NOT CMAKE_C_COMPILER_ID STREQUAL "MSVC") +if(ORT_TARGET_PROCESSOR MATCHES "^arm64.*" AND NOT CMAKE_C_COMPILER_ID STREQUAL "MSVC") FetchContent_Declare(kleidiai URL ${DEP_URL_kleidiai} URL_HASH SHA1=${DEP_SHA1_kleidiai}) onnxruntime_fetchcontent_makeavailable(kleidiai) set(KLEIDIAI_SOURCE_DIR ${kleidiai_SOURCE_DIR}) @@ -48,7 +48,7 @@ set(XNNPACK_DIR ${googlexnnpack_SOURCE_DIR}) set(XNNPACK_INCLUDE_DIR ${XNNPACK_DIR}/include) set(onnxruntime_EXTERNAL_LIBRARIES_XNNPACK XNNPACK microkernels-prod pthreadpool) -if(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm64.*" AND NOT CMAKE_C_COMPILER_ID STREQUAL "MSVC") +if(ORT_TARGET_PROCESSOR MATCHES "^arm64.*" AND NOT CMAKE_C_COMPILER_ID STREQUAL "MSVC") list(APPEND onnxruntime_EXTERNAL_LIBRARIES_XNNPACK kleidiai) endif() From 5b544e4e4cc044fd524e56384f04ccb747a4893e Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 16 Sep 2024 21:09:02 +0800 Subject: [PATCH 21/25] VS Platform str --- cmake/CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 460158146a41a..744202036bc9b 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -585,13 +585,13 @@ IF(CMAKE_OSX_ARCHITECTURES) SET(ORT_TARGET_PROCESSOR "${CMAKE_OSX_ARCHITECTURES}") ADD_COMPILE_OPTIONS("-Wno-shorten-64-to-32") ELSEIF(CMAKE_GENERATOR MATCHES "^Visual Studio " AND CMAKE_GENERATOR_PLATFORM) - IF(CMAKE_GENERATOR_PLATFORM STREQUAL "Win32") + IF(CMAKE_GENERATOR_PLATFORM MATCHES "^Win32") SET(ORT_TARGET_PROCESSOR "x86") - ELSEIF(CMAKE_GENERATOR_PLATFORM STREQUAL "x64") + ELSEIF(CMAKE_GENERATOR_PLATFORM MATCHES "^x64") SET(ORT_TARGET_PROCESSOR "x86_64") - ELSEIF(CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64") + ELSEIF(CMAKE_GENERATOR_PLATFORM MATCHES "^ARM64") SET(ORT_TARGET_PROCESSOR "arm64") - ELSEIF(CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64EC") + ELSEIF(CMAKE_GENERATOR_PLATFORM MATCHES "^ARM64EC") SET(ORT_TARGET_PROCESSOR "arm64") ELSE() MESSAGE(FATAL_ERROR "Unsupported Visual Studio architecture \"${CMAKE_GENERATOR_PLATFORM}\"") From fcedf50681cbb960794730610feb577f5556542d Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 16 Sep 2024 21:33:46 +0800 Subject: [PATCH 22/25] update --- cmake/CMakeLists.txt | 41 -------------------------------- cmake/external/xnnpack.cmake | 45 ++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 41 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 744202036bc9b..2c8fb4824d94a 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -572,47 +572,6 @@ if (CMAKE_CROSSCOMPILING) message("Doing crosscompiling") endif() -# --- [ Determine target processor -IF(CMAKE_OSX_ARCHITECTURES) - LIST(LENGTH CMAKE_OSX_ARCHITECTURES CMAKE_OSX_ARCHITECTURES_COUNT) - IF(CMAKE_OSX_ARCHITECTURES_COUNT GREATER 1) - MESSAGE(FATAL_ERROR "Unsupported Onnxruntime build with multiple OSX architectures (${CMAKE_OSX_ARCHITECTURES}). " - "Specify a single architecture in CMAKE_OSX_ARCHITECTURES and re-configure. ") - ENDIF() - IF(NOT CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64|arm64e|arm64_32)$") - MESSAGE(FATAL_ERROR "Unrecognized CMAKE_OSX_ARCHITECTURES value \"${CMAKE_OSX_ARCHITECTURES}\"") - ENDIF() - SET(ORT_TARGET_PROCESSOR "${CMAKE_OSX_ARCHITECTURES}") - ADD_COMPILE_OPTIONS("-Wno-shorten-64-to-32") -ELSEIF(CMAKE_GENERATOR MATCHES "^Visual Studio " AND CMAKE_GENERATOR_PLATFORM) - IF(CMAKE_GENERATOR_PLATFORM MATCHES "^Win32") - SET(ORT_TARGET_PROCESSOR "x86") - ELSEIF(CMAKE_GENERATOR_PLATFORM MATCHES "^x64") - SET(ORT_TARGET_PROCESSOR "x86_64") - ELSEIF(CMAKE_GENERATOR_PLATFORM MATCHES "^ARM64") - SET(ORT_TARGET_PROCESSOR "arm64") - ELSEIF(CMAKE_GENERATOR_PLATFORM MATCHES "^ARM64EC") - SET(ORT_TARGET_PROCESSOR "arm64") - ELSE() - MESSAGE(FATAL_ERROR "Unsupported Visual Studio architecture \"${CMAKE_GENERATOR_PLATFORM}\"") - ENDIF() -ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "^i[3-7]86$") - SET(ORT_TARGET_PROCESSOR "x86") -ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64") - SET(ORT_TARGET_PROCESSOR "x86_64") -ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "^armv[5-8]") - SET(ORT_TARGET_PROCESSOR "arm") -ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") - SET(ORT_TARGET_PROCESSOR "arm64") -ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "ppc64le") - SET(ORT_TARGET_PROCESSOR "ppc64") -ELSEIF(NOT ORT_TARGET_PROCESSOR MATCHES "^(x86(_64)?|arm64|riscv(32|64|128)|Hexagon|ppc64)$") - SET(ORT_TARGET_PROCESSOR "${CMAKE_SYSTEM_PROCESSOR}") -ELSE() - MESSAGE(FATAL_ERROR "Unrecognized CMAKE_SYSTEM_PROCESSOR value \"${CMAKE_SYSTEM_PROCESSOR}\"") -ENDIF() -MESSAGE(STATUS "Building for ORT_TARGET_PROCESSOR: ${ORT_TARGET_PROCESSOR}") - #Need python to generate def file if (onnxruntime_BUILD_SHARED_LIB OR onnxruntime_ENABLE_PYTHON) if (onnxruntime_ENABLE_PYTHON) diff --git a/cmake/external/xnnpack.cmake b/cmake/external/xnnpack.cmake index 999ebdbfbfe17..a652a6604653b 100644 --- a/cmake/external/xnnpack.cmake +++ b/cmake/external/xnnpack.cmake @@ -32,6 +32,51 @@ set(FXDIV_SOURCE_DIR ${fxdiv_SOURCE_DIR}) FetchContent_Declare(pthreadpool URL ${DEP_URL_pthreadpool} URL_HASH SHA1=${DEP_SHA1_pthreadpool}) onnxruntime_fetchcontent_makeavailable(pthreadpool) +# --- Determine target processor +# Why ORT_TARGET_PROCESSOR is only for XNNPACK +# So far, only Onnxruntime + XNNPack only allow one target processor. +# And we support Mac universal package, so, +# CMAKE_OSX_ARCHITECTURES_COUNT greater than 1 is allowed in other places. +IF(CMAKE_OSX_ARCHITECTURES) + LIST(LENGTH CMAKE_OSX_ARCHITECTURES CMAKE_OSX_ARCHITECTURES_COUNT) + IF(CMAKE_OSX_ARCHITECTURES_COUNT GREATER 1) + MESSAGE(STATUS "Unsupported Onnxruntime with XNNPACK build with multiple OSX architectures (${CMAKE_OSX_ARCHITECTURES}). " + "Specify a single architecture in CMAKE_OSX_ARCHITECTURES and re-configure. ") + ENDIF() + IF(NOT CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64|arm64e|arm64_32)$") + MESSAGE(FATAL_ERROR "Unrecognized CMAKE_OSX_ARCHITECTURES value \"${CMAKE_OSX_ARCHITECTURES}\"") + ENDIF() + SET(ORT_TARGET_PROCESSOR "${CMAKE_OSX_ARCHITECTURES}") + ADD_COMPILE_OPTIONS("-Wno-shorten-64-to-32") +ELSEIF(CMAKE_GENERATOR MATCHES "^Visual Studio " AND CMAKE_GENERATOR_PLATFORM) + IF(CMAKE_GENERATOR_PLATFORM MATCHES "^Win32") + SET(ORT_TARGET_PROCESSOR "x86") + ELSEIF(CMAKE_GENERATOR_PLATFORM MATCHES "^x64") + SET(ORT_TARGET_PROCESSOR "x86_64") + ELSEIF(CMAKE_GENERATOR_PLATFORM MATCHES "^ARM64") + SET(ORT_TARGET_PROCESSOR "arm64") + ELSEIF(CMAKE_GENERATOR_PLATFORM MATCHES "^ARM64EC") + SET(ORT_TARGET_PROCESSOR "arm64") + ELSE() + MESSAGE(FATAL_ERROR "Unsupported Visual Studio architecture \"${CMAKE_GENERATOR_PLATFORM}\"") + ENDIF() +ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "^i[3-7]86$") + SET(ORT_TARGET_PROCESSOR "x86") +ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64") + SET(ORT_TARGET_PROCESSOR "x86_64") +ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "^armv[5-8]") + SET(ORT_TARGET_PROCESSOR "arm") +ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") + SET(ORT_TARGET_PROCESSOR "arm64") +ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "ppc64le") + SET(ORT_TARGET_PROCESSOR "ppc64") +ELSEIF(NOT ORT_TARGET_PROCESSOR MATCHES "^(x86(_64)?|arm64|riscv(32|64|128)|Hexagon|ppc64)$") + SET(ORT_TARGET_PROCESSOR "${CMAKE_SYSTEM_PROCESSOR}") +ELSE() + MESSAGE(FATAL_ERROR "Unrecognized CMAKE_SYSTEM_PROCESSOR value \"${CMAKE_SYSTEM_PROCESSOR}\"") +ENDIF() +MESSAGE(STATUS "Building for ORT_TARGET_PROCESSOR: ${ORT_TARGET_PROCESSOR}") + # https://github.com/google/XNNPACK/blob/3b3f7b8a6668f6ab3b6ce33b9f1d1fce971549d1/CMakeLists.txt#L206C82-L206C117 if(ORT_TARGET_PROCESSOR MATCHES "^arm64.*" AND NOT CMAKE_C_COMPILER_ID STREQUAL "MSVC") FetchContent_Declare(kleidiai URL ${DEP_URL_kleidiai} URL_HASH SHA1=${DEP_SHA1_kleidiai}) From 256a23632a267f778bb08170f3ebc20ad3315f75 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 16 Sep 2024 23:15:57 +0800 Subject: [PATCH 23/25] update as comments --- onnxruntime/core/providers/xnnpack/math/softmax.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/xnnpack/math/softmax.cc b/onnxruntime/core/providers/xnnpack/math/softmax.cc index 0dc1e69140138..43e3ac193de5d 100644 --- a/onnxruntime/core/providers/xnnpack/math/softmax.cc +++ b/onnxruntime/core/providers/xnnpack/math/softmax.cc @@ -178,7 +178,7 @@ Softmax::Softmax(const OpKernelInfo& info) : XnnpackKernel{info} { &p); } - ORT_ENFORCE(xstatus == xnn_status_success && p != nullptr, "xnn_create_softmax_nc_", + ORT_ENFORCE(xstatus == xnn_status_success, "xnn_create_softmax_nc_", OpTypeToString(op_type_), " failed. Status:", xstatus); channel_dim_ = channels; op0_.reset(p); From 510b6d0efd0f304e78c43ccd1335a0ae1380f0fb Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Tue, 17 Sep 2024 19:02:04 +0800 Subject: [PATCH 24/25] Update cmake/external/xnnpack.cmake Co-authored-by: Scott McKay --- cmake/external/xnnpack.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/external/xnnpack.cmake b/cmake/external/xnnpack.cmake index a652a6604653b..eb865a18e182b 100644 --- a/cmake/external/xnnpack.cmake +++ b/cmake/external/xnnpack.cmake @@ -40,8 +40,8 @@ onnxruntime_fetchcontent_makeavailable(pthreadpool) IF(CMAKE_OSX_ARCHITECTURES) LIST(LENGTH CMAKE_OSX_ARCHITECTURES CMAKE_OSX_ARCHITECTURES_COUNT) IF(CMAKE_OSX_ARCHITECTURES_COUNT GREATER 1) - MESSAGE(STATUS "Unsupported Onnxruntime with XNNPACK build with multiple OSX architectures (${CMAKE_OSX_ARCHITECTURES}). " - "Specify a single architecture in CMAKE_OSX_ARCHITECTURES and re-configure. ") + MESSAGE(STATUS "Building ONNX Runtime with XNNPACK and multiple OSX architectures is not supported. Got:(${CMAKE_OSX_ARCHITECTURES}). " + "Please specify a single architecture in CMAKE_OSX_ARCHITECTURES and re-configure. ") ENDIF() IF(NOT CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64|arm64e|arm64_32)$") MESSAGE(FATAL_ERROR "Unrecognized CMAKE_OSX_ARCHITECTURES value \"${CMAKE_OSX_ARCHITECTURES}\"") From cf4cf080cb8e353859bc8d18fc83b867b1c4e4ab Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Tue, 17 Sep 2024 19:11:32 +0800 Subject: [PATCH 25/25] update comments --- cmake/external/xnnpack.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/external/xnnpack.cmake b/cmake/external/xnnpack.cmake index a652a6604653b..4bebb197a3bf6 100644 --- a/cmake/external/xnnpack.cmake +++ b/cmake/external/xnnpack.cmake @@ -77,6 +77,7 @@ ELSE() ENDIF() MESSAGE(STATUS "Building for ORT_TARGET_PROCESSOR: ${ORT_TARGET_PROCESSOR}") +# KleidiAI is only used in Arm64 platform and not supported by MSVC, the details can be seen in # https://github.com/google/XNNPACK/blob/3b3f7b8a6668f6ab3b6ce33b9f1d1fce971549d1/CMakeLists.txt#L206C82-L206C117 if(ORT_TARGET_PROCESSOR MATCHES "^arm64.*" AND NOT CMAKE_C_COMPILER_ID STREQUAL "MSVC") FetchContent_Declare(kleidiai URL ${DEP_URL_kleidiai} URL_HASH SHA1=${DEP_SHA1_kleidiai})