diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 054bec3eec42..b750d8963b5c 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -4,12 +4,65 @@ if(MSVC) add_definitions(/wd4996) endif() +# ncnn macro +include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/ncnn_add_param.cmake) + +set(benchncnn_PARAMS + alexnet.param + blazeface.param + efficientnet_b0.param + efficientnetv2_b0.param + FastestDet.param + googlenet_int8.param + googlenet.param + mnasnet.param + mobilenet_int8.param + mobilenet_ssd_int8.param + mobilenet_ssd.param + mobilenet_v2.param + mobilenet_v3.param + mobilenet_yolo.param + mobilenet.param + mobilenetv2_yolov3.param + nanodet_m.param + proxylessnasnet.param + regnety_400m.param + resnet18_int8.param + resnet18.param + resnet50_int8.param + resnet50.param + shufflenet_v2.param + shufflenet.param + squeezenet_int8.param + squeezenet_ssd_int8.param + squeezenet_ssd.param + squeezenet.param + vgg16_int8.param + vgg16.param + vision_transformer.param + yolo-fastest-1.1.param + yolo-fastestv2.param + yolov4-tiny.param +) + +foreach(PARAM_FILE ${benchncnn_PARAMS}) + ncnn_add_param("${CMAKE_CURRENT_SOURCE_DIR}/${PARAM_FILE}") +endforeach() + +add_custom_target(ncnn-generate-param DEPENDS ${NCNN_PARAM_HEX_FILES}) + +configure_file(benchncnn_param_data.h.in ${CMAKE_CURRENT_BINARY_DIR}/benchncnn_param_data.h) + add_executable(benchncnn benchncnn.cpp) target_link_libraries(benchncnn PRIVATE ncnn) +target_include_directories(benchncnn PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) + if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") target_link_libraries(benchncnn PRIVATE nodefs.js) endif() +add_dependencies(benchncnn ncnn-generate-param) + # add benchncnn to a virtual project group set_property(TARGET benchncnn PROPERTY FOLDER "benchmark") diff --git a/benchmark/README.md b/benchmark/README.md index 42da17bab422..c95ff90388f9 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -4,7 +4,7 @@ Only the network definition files (ncnn param) are required. The large model binary files (ncnn bin) are not loaded but generated randomly for speed test. -If no model specified, it would benchmark default list. More model networks may be added later. +If no model specified, it would benchmark default built-in models. More model networks may be added later. --- Build @@ -31,11 +31,20 @@ run benchncnn on android device ```shell # for running on android device, upload to /data/local/tmp/ folder adb push benchncnn /data/local/tmp/ -adb push /benchmark/*.param /data/local/tmp/ -adb shell + +# (optional) upload your ncnn model param to /data/local/tmp/ folder +adb push model.param /data/local/tmp/ # executed in android adb shell +adb shell cd /data/local/tmp/ + +# sample: benchmark built-in models on cpu, with 4 threads on big core, 4 loops and cooling_down +./benchncnn 4 4 2 -1 1 + +# sample: benchmark built-in models on gpu id 0, with 1 thread on big core, 8 loops, without cooling_down +./benchncnn 8 1 2 0 0 + ./benchncnn [loop count] [num threads] [powersave] [gpu device] [cooling down] [(key=value)...] param=model.param shape=[227,227,3],.. diff --git a/benchmark/benchncnn.cpp b/benchmark/benchncnn.cpp index 6891baf99d19..ac6b77af3ec4 100644 --- a/benchmark/benchncnn.cpp +++ b/benchmark/benchncnn.cpp @@ -15,6 +15,8 @@ #include "net.h" #include "gpu.h" +#include "benchncnn_param_data.h" + #ifndef NCNN_SIMPLESTL #include #endif @@ -46,12 +48,12 @@ static ncnn::VkAllocator* g_blob_vkallocator = 0; static ncnn::VkAllocator* g_staging_vkallocator = 0; #endif // NCNN_VULKAN -void benchmark(const char* comment, const std::vector& _in, const ncnn::Option& opt, bool fixed_path = true) +void benchmark(const char* comment, const std::vector& _in, const ncnn::Option& opt, const char* model_param_data = NULL) { // Skip if int8 model name and using GPU if (opt.use_vulkan_compute && strstr(comment, "int8") != NULL) { - if (!fixed_path) + if (!model_param_data) fprintf(stderr, "%20s skipped (int8+GPU not supported)\n", comment); return; } @@ -78,17 +80,9 @@ void benchmark(const char* comment, const std::vector& _in, const ncn } #endif // NCNN_VULKAN -#ifdef __EMSCRIPTEN__ -#define MODEL_DIR "/working/" -#else -#define MODEL_DIR "" -#endif - - if (fixed_path) + if (model_param_data) { - char parampath[256]; - sprintf(parampath, MODEL_DIR "%s.param", comment); - net.load_param(parampath); + net.load_param_mem(model_param_data); } else { @@ -173,11 +167,11 @@ void benchmark(const char* comment, const std::vector& _in, const ncn fprintf(stderr, "%20s min = %7.2f max = %7.2f avg = %7.2f\n", comment, time_min, time_max, time_avg); } -void benchmark(const char* comment, const ncnn::Mat& _in, const ncnn::Option& opt, bool fixed_path = true) +void benchmark(const char* comment, const ncnn::Mat& _in, const ncnn::Option& opt, const char* model_param_data = NULL) { std::vector inputs; inputs.push_back(_in); - return benchmark(comment, inputs, opt, fixed_path); + return benchmark(comment, inputs, opt, model_param_data); } void show_usage() @@ -387,82 +381,82 @@ int main(int argc, char** argv) if (model != 0) { // run user defined benchmark - benchmark(model, inputs, opt, false); + benchmark(model, inputs, opt); } else { // run default cases - benchmark("squeezenet", ncnn::Mat(227, 227, 3), opt); + benchmark("squeezenet", ncnn::Mat(227, 227, 3), opt, squeezenet_param_data); - benchmark("squeezenet_int8", ncnn::Mat(227, 227, 3), opt); + benchmark("squeezenet_int8", ncnn::Mat(227, 227, 3), opt, squeezenet_int8_param_data); - benchmark("mobilenet", ncnn::Mat(224, 224, 3), opt); + benchmark("mobilenet", ncnn::Mat(224, 224, 3), opt, mobilenet_param_data); - benchmark("mobilenet_int8", ncnn::Mat(224, 224, 3), opt); + benchmark("mobilenet_int8", ncnn::Mat(224, 224, 3), opt, mobilenet_int8_param_data); - benchmark("mobilenet_v2", ncnn::Mat(224, 224, 3), opt); + benchmark("mobilenet_v2", ncnn::Mat(224, 224, 3), opt, mobilenet_v2_param_data); - // benchmark("mobilenet_v2_int8", ncnn::Mat(224, 224, 3), opt); + // benchmark("mobilenet_v2_int8", ncnn::Mat(224, 224, 3), opt, mobilenet_v2_int8_param_data); - benchmark("mobilenet_v3", ncnn::Mat(224, 224, 3), opt); + benchmark("mobilenet_v3", ncnn::Mat(224, 224, 3), opt, mobilenet_v3_param_data); - benchmark("shufflenet", ncnn::Mat(224, 224, 3), opt); + benchmark("shufflenet", ncnn::Mat(224, 224, 3), opt, shufflenet_param_data); - benchmark("shufflenet_v2", ncnn::Mat(224, 224, 3), opt); + benchmark("shufflenet_v2", ncnn::Mat(224, 224, 3), opt, shufflenet_v2_param_data); - benchmark("mnasnet", ncnn::Mat(224, 224, 3), opt); + benchmark("mnasnet", ncnn::Mat(224, 224, 3), opt, mnasnet_param_data); - benchmark("proxylessnasnet", ncnn::Mat(224, 224, 3), opt); + benchmark("proxylessnasnet", ncnn::Mat(224, 224, 3), opt, proxylessnasnet_param_data); - benchmark("efficientnet_b0", ncnn::Mat(224, 224, 3), opt); + benchmark("efficientnet_b0", ncnn::Mat(224, 224, 3), opt, efficientnet_b0_param_data); - benchmark("efficientnetv2_b0", ncnn::Mat(224, 224, 3), opt); + benchmark("efficientnetv2_b0", ncnn::Mat(224, 224, 3), opt, efficientnetv2_b0_param_data); - benchmark("regnety_400m", ncnn::Mat(224, 224, 3), opt); + benchmark("regnety_400m", ncnn::Mat(224, 224, 3), opt, regnety_400m_param_data); - benchmark("blazeface", ncnn::Mat(128, 128, 3), opt); + benchmark("blazeface", ncnn::Mat(128, 128, 3), opt, blazeface_param_data); - benchmark("googlenet", ncnn::Mat(224, 224, 3), opt); + benchmark("googlenet", ncnn::Mat(224, 224, 3), opt, googlenet_param_data); - benchmark("googlenet_int8", ncnn::Mat(224, 224, 3), opt); + benchmark("googlenet_int8", ncnn::Mat(224, 224, 3), opt, googlenet_int8_param_data); - benchmark("resnet18", ncnn::Mat(224, 224, 3), opt); + benchmark("resnet18", ncnn::Mat(224, 224, 3), opt, resnet18_param_data); - benchmark("resnet18_int8", ncnn::Mat(224, 224, 3), opt); + benchmark("resnet18_int8", ncnn::Mat(224, 224, 3), opt, resnet18_int8_param_data); - benchmark("alexnet", ncnn::Mat(227, 227, 3), opt); + benchmark("alexnet", ncnn::Mat(227, 227, 3), opt, alexnet_param_data); - benchmark("vgg16", ncnn::Mat(224, 224, 3), opt); + benchmark("vgg16", ncnn::Mat(224, 224, 3), opt, vgg16_param_data); - benchmark("vgg16_int8", ncnn::Mat(224, 224, 3), opt); + benchmark("vgg16_int8", ncnn::Mat(224, 224, 3), opt, vgg16_int8_param_data); - benchmark("resnet50", ncnn::Mat(224, 224, 3), opt); + benchmark("resnet50", ncnn::Mat(224, 224, 3), opt, resnet50_param_data); - benchmark("resnet50_int8", ncnn::Mat(224, 224, 3), opt); + benchmark("resnet50_int8", ncnn::Mat(224, 224, 3), opt, resnet50_int8_param_data); - benchmark("squeezenet_ssd", ncnn::Mat(300, 300, 3), opt); + benchmark("squeezenet_ssd", ncnn::Mat(300, 300, 3), opt, squeezenet_ssd_param_data); - benchmark("squeezenet_ssd_int8", ncnn::Mat(300, 300, 3), opt); + benchmark("squeezenet_ssd_int8", ncnn::Mat(300, 300, 3), opt, squeezenet_ssd_int8_param_data); - benchmark("mobilenet_ssd", ncnn::Mat(300, 300, 3), opt); + benchmark("mobilenet_ssd", ncnn::Mat(300, 300, 3), opt, mobilenet_ssd_param_data); - benchmark("mobilenet_ssd_int8", ncnn::Mat(300, 300, 3), opt); + benchmark("mobilenet_ssd_int8", ncnn::Mat(300, 300, 3), opt, mobilenet_ssd_int8_param_data); - benchmark("mobilenet_yolo", ncnn::Mat(416, 416, 3), opt); + benchmark("mobilenet_yolo", ncnn::Mat(416, 416, 3), opt, mobilenet_yolo_param_data); - benchmark("mobilenetv2_yolov3", ncnn::Mat(352, 352, 3), opt); + benchmark("mobilenetv2_yolov3", ncnn::Mat(352, 352, 3), opt, mobilenetv2_yolov3_param_data); - benchmark("yolov4-tiny", ncnn::Mat(416, 416, 3), opt); + benchmark("yolov4-tiny", ncnn::Mat(416, 416, 3), opt, yolov4_tiny_param_data); - benchmark("nanodet_m", ncnn::Mat(320, 320, 3), opt); + benchmark("nanodet_m", ncnn::Mat(320, 320, 3), opt, nanodet_m_param_data); - benchmark("yolo-fastest-1.1", ncnn::Mat(320, 320, 3), opt); + benchmark("yolo-fastest-1.1", ncnn::Mat(320, 320, 3), opt, yolo_fastest_1_1_param_data); - benchmark("yolo-fastestv2", ncnn::Mat(352, 352, 3), opt); + benchmark("yolo-fastestv2", ncnn::Mat(352, 352, 3), opt, yolo_fastestv2_param_data); - benchmark("vision_transformer", ncnn::Mat(384, 384, 3), opt); + benchmark("vision_transformer", ncnn::Mat(384, 384, 3), opt, vision_transformer_param_data); - benchmark("FastestDet", ncnn::Mat(352, 352, 3), opt); + benchmark("FastestDet", ncnn::Mat(352, 352, 3), opt, FastestDet_param_data); } #if NCNN_VULKAN delete g_blob_vkallocator; diff --git a/benchmark/benchncnn_param_data.h.in b/benchmark/benchncnn_param_data.h.in new file mode 100644 index 000000000000..b04970ea9266 --- /dev/null +++ b/benchmark/benchncnn_param_data.h.in @@ -0,0 +1,5 @@ +// Benchncnn Param Data header +// +// This file is auto-generated by cmake, don't edit it. + +@param_header_data@ diff --git a/cmake/ncnn_add_param.cmake b/cmake/ncnn_add_param.cmake new file mode 100644 index 000000000000..3e5ae5088e94 --- /dev/null +++ b/cmake/ncnn_add_param.cmake @@ -0,0 +1,37 @@ + +macro(ncnn_add_param NCNN_PARAM_SRC) + # Get the file name with extension + get_filename_component(NCNN_PARAM_SRC_NAME_WE ${NCNN_PARAM_SRC} NAME) + # Manually remove ".param" since NAME_WE treats ".1.param" as a multi-extension + string(REPLACE ".param" "" NCNN_PARAM_SRC_NAME_WE "${NCNN_PARAM_SRC_NAME_WE}") + # Replace characters invalid in C identifiers ('.' and '-') with underscores + string(REPLACE ".param" "" NCNN_PARAM_SRC_NAME_WE "${NCNN_PARAM_SRC_NAME_WE}") + # Replace characters invalid in C identifiers ('.' and '-') with underscores + string(REPLACE "." "_" NCNN_PARAM_SRC_NAME_WE "${NCNN_PARAM_SRC_NAME_WE}") + string(REPLACE "-" "_" NCNN_PARAM_SRC_NAME_WE "${NCNN_PARAM_SRC_NAME_WE}") + # Check if the result is empty + if (NOT NCNN_PARAM_SRC_NAME_WE) + message(FATAL_ERROR "Failed to extract valid filename from '${NCNN_PARAM_SRC}'") + endif() + # Check if the extracted filename is a valid C identifier + string(REGEX MATCH "^[A-Za-z_][A-Za-z0-9_]*$" is_valid "${NCNN_PARAM_SRC_NAME_WE}") + if (NOT is_valid) + message(FATAL_ERROR "Extracted filename '${NCNN_PARAM_SRC_NAME_WE}' is not a valid C identifier") + endif() + + set(NCNN_PARAM_HEADER ${CMAKE_CURRENT_BINARY_DIR}/param/${NCNN_PARAM_SRC_NAME_WE}.hex.h) + + add_custom_command( + OUTPUT ${NCNN_PARAM_HEADER} + COMMAND ${CMAKE_COMMAND} -DPARAM_SRC=${NCNN_PARAM_SRC} -DPARAM_SRC_NAME_WE=${NCNN_PARAM_SRC_NAME_WE} -DPARAM_HEADER=${NCNN_PARAM_HEADER} -P "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/ncnn_generate_param_header.cmake" + DEPENDS ${NCNN_PARAM_SRC} + COMMENT "Preprocessing param source ${NCNN_PARAM_SRC_NAME_WE}.param" + VERBATIM + ) + set_source_files_properties(${NCNN_PARAM_HEADER} PROPERTIES GENERATED TRUE) + + get_filename_component(NCNN_PARAM_HEADER_NAME ${NCNN_PARAM_HEADER} NAME) + string(APPEND param_header_data "#include \"param/${NCNN_PARAM_HEADER_NAME}\"\n") + + list(APPEND NCNN_PARAM_HEX_FILES ${NCNN_PARAM_HEADER}) +endmacro() diff --git a/cmake/ncnn_generate_param_header.cmake b/cmake/ncnn_generate_param_header.cmake new file mode 100644 index 000000000000..675b067ec554 --- /dev/null +++ b/cmake/ncnn_generate_param_header.cmake @@ -0,0 +1,23 @@ + +# must define PARAM_HEADER PARAM_SRC PARAM_SRC_NAME_WE + +file(READ ${PARAM_SRC} param_data) + +# remove whitespace +string(REGEX REPLACE "\n +" "\n" param_data ${param_data}) + +# replace more spaces to one space +string(REGEX REPLACE "[ \t]+" " " param_data "${param_data}") + +# remove empty line +string(REGEX REPLACE "\n[\n]+" "\n" param_data "${param_data}") + +# text to hex +file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/param/${PARAM_SRC_NAME_WE}.text2hex.txt "${param_data}") +file(READ ${CMAKE_CURRENT_BINARY_DIR}/param/${PARAM_SRC_NAME_WE}.text2hex.txt param_data_hex HEX) +string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," param_data_hex ${param_data_hex}) +string(FIND "${param_data_hex}" "," tail_comma REVERSE) +string(SUBSTRING "${param_data_hex}" 0 ${tail_comma} param_data_hex) + +# generate model param header file +file(WRITE ${PARAM_HEADER} "static const char ${PARAM_SRC_NAME_WE}_param_data[] = {${param_data_hex},0x00};\n")