diff --git a/packaging/build_3rdparty_static_debian.sh b/packaging/build_3rdparty_static_debian.sh index 7fed688fc..d2d61d4b5 100755 --- a/packaging/build_3rdparty_static_debian.sh +++ b/packaging/build_3rdparty_static_debian.sh @@ -11,6 +11,7 @@ cd $BASEDIR/debian_3rdparty ./build_taglib.sh ./build_yaml.sh ./build_chromaprint.sh +./build_onnx.sh #!/usr/bin/env bash if [[ "$*" == *--with-gaia* ]] diff --git a/packaging/build_config.sh b/packaging/build_config.sh index 5ce5fffc3..4c20e2184 100755 --- a/packaging/build_config.sh +++ b/packaging/build_config.sh @@ -2,7 +2,7 @@ HOST=i686-w64-mingw32 if [ -z "${PREFIX}" ]; then - PREFIX=`pwd` + PREFIX=$(pwd) fi echo Installing to: $PREFIX @@ -28,7 +28,7 @@ CHROMAPRINT_VERSION=1.4.3 QT_SOURCE_URL=https://download.qt.io/archive/qt/4.8/4.8.4/qt-everywhere-opensource-src-4.8.4.tar.gz GAIA_VERSION=2.4.6-86-ged433ed TENSORFLOW_VERSION=2.5.0 - +LIBONNXRUNTIME_VERSION=1.22.1 FFMPEG_AUDIO_FLAGS=" --disable-programs diff --git a/packaging/debian_3rdparty/build_onnx.sh b/packaging/debian_3rdparty/build_onnx.sh new file mode 100644 index 000000000..766eb1d0c --- /dev/null +++ b/packaging/debian_3rdparty/build_onnx.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +set -e +. ../build_config.sh + +rm -rf tmp +mkdir tmp +cd tmp + +# Prerequisites: python>=3.10 +# cmake>=3.28 + +echo "Building onnxruntime $LIBONNXRUNTIME_VERSION" + +curl -SLO "https://github.com/microsoft/onnxruntime/archive/refs/tags/v$LIBONNXRUNTIME_VERSION.tar.gz" + +tar -xf v$LIBONNXRUNTIME_VERSION.tar.gz +cd onnxruntime-$LIBONNXRUNTIME_VERSION + +python3 -m pip install cmake + +# Build the dynamic library +./build.sh \ + --config Release \ + --build_shared_lib \ + --parallel \ + --compile_no_warning_as_error \ + --skip_submodule_sync \ + --allow_running_as_root \ + --skip_tests \ + --cmake_extra_defines FETCHCONTENT_TRY_FIND_PACKAGE_MODE=NEVER CMAKE_INSTALL_PREFIX=${PREFIX} + +# copying onnxruntime files +mkdir -p "${PREFIX}"/lib/pkgconfig/ +mkdir -p "${PREFIX}"/include/onnxruntime/ + +cp build/Linux/Release/libonnxruntime.pc ${PREFIX}/lib/pkgconfig/ +cp -r build/Linux/Release/libonnxruntime.so* ${PREFIX}/lib/ + +cp include/onnxruntime/core/session/onnxruntime_cxx_inline.h ${PREFIX}/include/onnxruntime/ +cp include/onnxruntime/core/session/onnxruntime_float16.h ${PREFIX}/include/onnxruntime/ +cp include/onnxruntime/core/session/onnxruntime_c_api.h ${PREFIX}/include/onnxruntime/ +cp include/onnxruntime/core/session/onnxruntime_cxx_api.h ${PREFIX}/include/onnxruntime/ + +cd ../.. +rm -fr tmp diff --git a/src/algorithms/machinelearning/onnxpredict.cpp b/src/algorithms/machinelearning/onnxpredict.cpp new file mode 100644 index 000000000..8ca5ccbd8 --- /dev/null +++ b/src/algorithms/machinelearning/onnxpredict.cpp @@ -0,0 +1,460 @@ +/* + * Copyright (C) 2006-2021 Music Technology Group - Universitat Pompeu Fabra + * + * This file is part of Essentia + * + * Essentia is free software: you can redistribute it and/or modify it under + * the terms of the GNU Affero General Public License as published by the Free + * Software Foundation (FSF), either version 3 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the Affero GNU General Public License + * version 3 along with this program. If not, see http://www.gnu.org/licenses/ + */ + +#include "onnxpredict.h" + +using namespace std; +using namespace essentia; +using namespace standard; + +const char* OnnxPredict::name = "OnnxPredict"; +const char* OnnxPredict::category = "Machine Learning"; + +const char* OnnxPredict::description = DOC("This algorithm runs a Onnx model and stores the desired output tensors in a pool.\n" +"The Onnx model should be saved in Open Neural Network Exchange (.onnx) binary format [1], and should contain both the architecture and the weights of the model.\n" +"The parameter `inputs` should contain a list with the names of the input nodes that feed the model. The input Pool should contain the tensors corresponding to each input node stored using Essentia tensors. " +"The pool namespace for each input tensor has to match the input node's name.\n" +"In the same way, the `outputs` parameter should contain the names of the tensors to save. These tensors will be stored inside the output pool under a namespace that matches the tensor's name. " +"To print a list with all the available nodes in the graph set the first element of `outputs` as an empty string (i.e., \"\")." +"\n" +"This algorithm is a wrapper for the ONNX Runtime Inferencing API [2]. The first time it is configured with a non-empty `graphFilename` it will try to load the contained graph and to attach a ONNX session to it. " +"The reset method deletes the model inputs and outputs internally stored in a vector. " +"By reconfiguring the algorithm the graph is reloaded and the reset method is called.\n" +"\n" +"References:\n" +" [1] ONNX - The open standard for machine learning interoperability.\n" +" https://onnx.ai/onnx/intro/\n\n" +" [2] ONNX Runtime API - a cross-platform machine-learning model accelerator, with a flexible interface to integrate hardware-specific libraries.\n" +" https://onnxruntime.ai/docs/"); + + +void OnnxPredict::configure() { + _graphFilename = parameter("graphFilename").toString(); + _deviceId = parameter("deviceId").toInt(); + std::string opt = parameter("optimizationLevel").toString(); + + if (opt == "disable_all") { + _optimizationLevel = OnnxOptimizationLevel::DISABLE_ALL; + } + else if (opt == "basic") { + _optimizationLevel = OnnxOptimizationLevel::BASIC; + } + else if (opt == "extended") { + _optimizationLevel = OnnxOptimizationLevel::EXTENDED; + } + else if (opt == "all") { + _optimizationLevel = OnnxOptimizationLevel::ALL; + } + else { + throw EssentiaException( + "OnnxPredict: invalid optimizationLevel: " + opt + ". Choices: {disable_all,basic,extended,all}" + ); + } + + if ((_graphFilename.empty()) and (_isConfigured)) { + E_WARNING("OnnxPredict: You are trying to update a valid configuration with invalid parameters. " + "If you want to update the configuration specify a valid `graphFilename` parameter."); + }; + + // Do not do anything if we did not get a non-empty model name. + if (_graphFilename.empty()) return; + + reset(); + + // get input and output info (names, type and shapes) + all_input_infos = setTensorInfos(*_session, _allocator, "inputs"); + all_output_infos = setTensorInfos(*_session, _allocator, "outputs"); + + // read inputs and outputs as input parameter + _inputs = parameter("inputs").toVectorString(); + _outputs = parameter("outputs").toVectorString(); + + _squeeze = parameter("squeeze").toBool(); + + _nInputs = _inputs.size(); + _nOutputs = _outputs.size(); + + // excepts if no inputs are defined + if (_nInputs == 0){ + throw EssentiaException("No model input was defined.\n" + availableInputInfo()); + } + + // excepts if no outputs are defined + if (_nOutputs == 0){ + throw EssentiaException("No model output was defined.\n" + availableOutputInfo()); + } + + // If the first output name is empty just print out the list of nodes and return. + if (_outputs[0] == "") { + E_INFO(getTensorInfos(all_input_infos, "Model Inputs")); + E_INFO(getTensorInfos(all_output_infos, "Model Outputs")); + return; + } + + _isConfigured = true; + + // check model has input and output https://github.com/microsoft/onnxruntime-inference-examples/blob/7a635daae48450ff142e5c0848a564b245f04112/c_cxx/model-explorer/model-explorer.cpp#L99C3-L100C63 + for (int i = 0; i < _inputs.size(); i++) { + for (int j = 0; j < all_input_infos.size(); j++) { + if (_inputs[i] == all_input_infos[j].name){ + _inputNodes.push_back(all_input_infos[j]); + } + } + } + + // Check if _inputNodes is empty - release an exception instead + if (!_inputNodes.size()) + throw EssentiaException("No input node was found.\n" + availableInputInfo()); + + for (int i = 0; i < _outputs.size(); i++) { + for (int j = 0; j < all_output_infos.size(); j++) { + if (_outputs[i] == all_output_infos[j].name){ + _outputNodes.push_back(all_output_infos[j]); + } + } + } + + // Check if _outputNodes is empty - release an exception instead + if (!_outputNodes.size()) + throw EssentiaException("No output node was found.\n" + availableOutputInfo()); + + for (size_t i = 0; i < _nInputs; i++) { + checkName(_inputs[i], all_input_infos); + } + + for (size_t i = 0; i < _nOutputs; i++) { + checkName(_outputs[i], all_output_infos); + } +} + +std::vector OnnxPredict::setTensorInfos(const Ort::Session& session, Ort::AllocatorWithDefaultOptions& allocator, const std::string& port) { + + std::vector infos; + + size_t count = (port == "inputs") ? session.GetInputCount() : session.GetOutputCount(); + auto names_raw = (port == "inputs") ? session.GetInputNames() : session.GetOutputNames(); + + for (size_t i = 0; i < count; ++i) { + auto name_raw = names_raw[i]; + + std::string name(name_raw); + Ort::TypeInfo type_info = (port == "inputs") ? session.GetInputTypeInfo(i) : session.GetOutputTypeInfo(i); + + auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); + + TensorInfo info; + info.name = name; + info.type = tensor_info.GetElementType(); + info.shape = tensor_info.GetShape(); + + infos.push_back(std::move(info)); + } + + return infos; +} + +void OnnxPredict::printTensorInfos(const std::vector& infos, const std::string& label) { + E_INFO("=== " << label << " ===\n"); + for (const auto& info : infos) { + E_INFO("[Name] " << info.name); + E_INFO(" [Type] " << info.type); + E_INFO(" [Shape] ["); + for (size_t j = 0; j < info.shape.size(); ++j) { + E_INFO(info.shape[j]); + if (j + 1 < info.shape.size()) E_INFO(", "); + } + E_INFO("]\n"); + } +} + +std::string OnnxPredict::getTensorInfos(const std::vector& infos, const std::string& label) { + std::string out; + out += "=== " + label + " ===\n"; + for (const auto& info : infos) { + out += "[Name] " + info.name + "\n"; + std::string type_str = onnxTypeToString(info.type); + out += "\t[Type] " + type_str + "\n"; + out += "\t[Shape] ["; + for (size_t j = 0; j < info.shape.size(); ++j) { + out += info.shape[j]; + if (j + 1 < info.shape.size()) out += ", "; + } + out += "]\n"; + } + return out; +} + +void OnnxPredict::reset() { + + input_names.clear(); + output_names.clear(); + _inputNodes.clear(); + _outputNodes.clear(); + + try{ + + // Reset session + _session.reset(); + + // Reset SessionOptions by constructing a fresh object + _sessionOptions = Ort::SessionOptions{}; + + // Auto-detect EPs + #ifdef USE_CUDA + if (std::find(providers.begin(), providers.end(), "CUDAExecutionProvider") != providers.end()) { + OrtSessionOptionsAppendExecutionProvider_CUDA(_sessionOptions, _deviceId); + E_INFO("✅ Using CUDA Execution Provider (GPU " << _deviceId << ")"); + } + #endif + + #ifdef USE_METAL + if (std::find(providers.begin(), providers.end(), "MetalExecutionProvider") != providers.end()) { + OrtSessionOptionsAppendExecutionProvider_Metal(_sessionOptions, _deviceId); + E_INFO("✅ Using Metal Execution Provider (GPU " << _deviceId << ")"); + } + #endif + + #ifdef USE_COREML + if (std::find(providers.begin(), providers.end(), "CoreMLExecutionProvider") != providers.end()) { + OrtSessionOptionsAppendExecutionProvider_CoreML(_sessionOptions, _deviceId); + E_INFO("✅ Using Core ML Execution Provider (GPU " << _deviceId << ")"); + } + #endif + + // Set graph optimization level - Map our enum to ONNX Runtime | Check https://onnxruntime.ai/docs/performance/model-optimizations/graph-optimizations.html + switch (_optimizationLevel) { + case OnnxOptimizationLevel::DISABLE_ALL: + _sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_DISABLE_ALL); + break; + case OnnxOptimizationLevel::BASIC: + _sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_BASIC); + break; + case OnnxOptimizationLevel::EXTENDED: + _sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); + break; + case OnnxOptimizationLevel::ALL: + _sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); + break; + } + _sessionOptions.SetIntraOpNumThreads(0); + + // Initialize session + _session = std::make_unique(_env, _graphFilename.c_str(), _sessionOptions); + + } + catch (Ort::Exception e) { + // Fallback only if optimization > BASIC + if (_optimizationLevel != OnnxOptimizationLevel::BASIC && + _optimizationLevel != OnnxOptimizationLevel::DISABLE_ALL) { + E_WARNING( + "OnnxPredict: graph optimization level failed (" + + std::string(e.what()) + + "), retrying with BASIC optimization" + ); + // Fallback to BASIC + _sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_BASIC); + _session = std::make_unique(_env, _graphFilename.c_str(), _sessionOptions); + } + else + // No fallback possible + throw EssentiaException("OnnxPredict: session creation failed: " + std::string(e.what()), e.GetOrtErrorCode()); + } + + E_INFO("OnnxPredict: Successfully loaded graph file: `" << _graphFilename << "`"); +} + +void OnnxPredict::compute() { + + if (!_isConfigured) { + throw EssentiaException("OnnxPredict: This algorithm is not configured. To configure this algorithm you " + "should specify a valid `graphFilename`, `inputs` and `outputs` as input parameter."); + } + + const Pool& poolIn = _poolIn.get(); + Pool& poolOut = _poolOut.get(); + + std::vector> inputDataVector; // <-- keeps inputs alive + std:vector> shapes; // <-- keeps shapes alive + + if (!input_tensors.empty()) + input_tensors.clear(); // <-- destroy input tensors + + + // Parse the input tensors from the pool into ONNX Runtime tensors. + for (size_t i = 0; i < _nInputs; i++) { + + const Tensor& inputData = poolIn.value >(_inputs[i]); + + // Step 1: Get tensor shape + std::vector shape; + int dims = 1; + + shape.push_back((int64_t)inputData.dimension(0)); + + if (_squeeze) { + + for(int j = 1; j < inputData.rank(); j++) { + if (inputData.dimension(j) > 1) { + shape.push_back((int64_t)inputData.dimension(j)); + dims++; + } + } + + // There should be at least 2 dimensions (batch, data) + if (dims == 1) { + shape.push_back((int64_t) 1); + dims++; + } + + } else { + dims = inputData.rank(); + for(int j = 1; j < dims; j++) { + shape.push_back((int64_t)inputData.dimension(j)); + } + } + + // Step 2: keep Real (float32) as-is --- + inputDataVector.emplace_back(inputData.size()); + // Essentia::Real is already float32 by default, so no need to cast. + // We copy directly into the input vector that will be fed to ONNX tensor. + std::copy(inputData.data(), inputData.data() + inputData.size(), inputDataVector.back().begin()); + + // Step 3: Create ONNX Runtime tensor + #ifdef USE_CUDA + if (_sessionOptions.GetExecutionProviderCount() > 0 && + std::string(_sessionOptions.GetExecutionProviderName(0)) == "CUDAExecutionProvider") { + _memoryInfo = Ort::MemoryInfo::CreateCuda(_deviceId, OrtMemTypeDefault); + } else + #endif + { + _memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + } + + if (_memoryInfo == nullptr) { + throw EssentiaException("OnnxPredict: Error allocating memory for input tensor."); + } + + if (_memoryInfo == NULL) { + throw EssentiaException("OnnxRuntimePredict: Error allocating memory for input tensor."); + } + + input_tensors.emplace_back(Ort::Value::CreateTensor(_memoryInfo, inputDataVector.back().data(), inputDataVector.back().size(), shape.data(), shape.size())); + shapes.push_back(shape); + } + + // Define input and output names + for (const auto& tensorInfo : _inputNodes) { + input_names.push_back(tensorInfo.name.c_str()); + } + + for (const auto& tensorInfo : _outputNodes) { + output_names.push_back(tensorInfo.name.c_str()); + } + + // Run the Onnxruntime session. + auto output_tensors = _session->Run(_runOptions, // Run options. + input_names.data(), // Input node names. + input_tensors.data(), // Input tensor values. + _nInputs, // Number of inputs. + output_names.data(), // Output node names. + _nOutputs // Number of outputs. + ); + + // Map output tensors to pool + for (size_t i = 0; i < output_tensors.size(); ++i) { + + const Real* outputData = output_tensors[i].GetTensorData(); + + // Create and array to store the output tensor shape. + array _shape {1, 1, 1, 1}; + _shape[0] = (int)shapes[0][0]; + + for (size_t j = 1; j < _outputNodes[i].shape.size(); j++){ + int shape_idx = _shape.size() - j; + _shape[shape_idx] = (int)_outputNodes[i].shape[_outputNodes[i].shape.size() - j]; + } + + // Store tensor in pool + const Tensor tensorMap = TensorMap(outputData, _shape); + poolOut.set(_outputs[i], tensorMap); + } + +} + + +void OnnxPredict::checkName(const string nodeName, std::vector _infos) { + + vector _names; + + for(int i = 0; i< _infos.size(); i++) { + _names.push_back(_infos[i].name); + } + + std::unordered_set lookup(_names.begin(), _names.end()); + if (lookup.find(nodeName) == lookup.end()) + throw EssentiaException("OnnxPredict: `" + nodeName + "` is not a valid input node name. Make sure that all " + "your inputs are defined in the node list."); +} + + +vector OnnxPredict::inputNames() { + + vector inputNames; + + // inputs + for(int i = 0; i< all_input_infos.size(); i++) { + inputNames.push_back(all_input_infos[i].name); + } + + return inputNames; +} + +vector OnnxPredict::outputNames() { + + vector outputNames; + + // inputs + for(int i = 0; i< all_input_infos.size(); i++) { + outputNames.push_back(all_input_infos[i].name); + } + + return outputNames; +} + +std::string OnnxPredict::onnxTypeToString(ONNXTensorElementDataType type) { + switch (type) { + case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT: return "float32"; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8: return "uint8"; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8: return "int8"; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16: return "uint16"; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16: return "int16"; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32: return "int32"; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64: return "int64"; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING: return "string"; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL: return "bool"; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16: return "float16"; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE: return "float64"; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32: return "uint32"; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64: return "uint64"; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64: return "complex64"; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128: return "complex128"; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16: return "bfloat16"; + default: return "unknown"; + } +} diff --git a/src/algorithms/machinelearning/onnxpredict.h b/src/algorithms/machinelearning/onnxpredict.h new file mode 100644 index 000000000..e61fa14bc --- /dev/null +++ b/src/algorithms/machinelearning/onnxpredict.h @@ -0,0 +1,179 @@ +/* + * Copyright (C) 2006-2021 Music Technology Group - Universitat Pompeu Fabra + * + * This file is part of Essentia + * + * Essentia is free software: you can redistribute it and/or modify it under + * the terms of the GNU Affero General Public License as published by the Free + * Software Foundation (FSF), either version 3 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the Affero GNU General Public License + * version 3 along with this program. If not, see http://www.gnu.org/licenses/ + */ + +#ifndef ESSENTIA_ONNXPREDICT_H +#define ESSENTIA_ONNXPREDICT_H + +#include "algorithm.h" +#include "pool.h" + +#define ORT_ENABLE_EXTENDED_API +#include + +#include + +enum class OnnxOptimizationLevel { + DISABLE_ALL, + BASIC, + EXTENDED, + ALL +}; + +std::ostream& operator<<(std::ostream&, OnnxOptimizationLevel); +std::istream& operator>>(std::istream&, OnnxOptimizationLevel&); + +namespace essentia { +namespace standard { + +struct TensorInfo { + std::string name; + ONNXTensorElementDataType type; + std::vector shape; +}; + +class OnnxPredict : public Algorithm { + + protected: + + Input _poolIn; + Output _poolOut; + + std::string _graphFilename; + std::vector _inputs; + std::vector _outputs; + size_t _deviceId; + OnnxOptimizationLevel _optimizationLevel; + + bool _squeeze; + bool _isConfigured; + + size_t _nInputs; + size_t _nOutputs; + + Ort::Value _inputTensor{nullptr}; + Ort::Value _outputTensor{nullptr}; + + std::vector input_tensors; + std::vector input_names; + std::vector output_names; + + Ort::Env _env{nullptr}; + Ort::SessionOptions _sessionOptions{nullptr}; + std::unique_ptr _session; + + + Ort::RunOptions _runOptions; + Ort::AllocatorWithDefaultOptions _allocator; + Ort::MemoryInfo _memoryInfo{ nullptr }; // Used to allocate memory for inputs + + std::vector all_input_infos; + std::vector all_output_infos; + + std::vector _inputNodes; + std::vector _outputNodes; + + std::vector inputNames(); + std::vector outputNames(); + std::vector setTensorInfos(const Ort::Session&, Ort::AllocatorWithDefaultOptions&, const std::string&); + void printTensorInfos(const std::vector&, const std::string&); + std::string getTensorInfos(const std::vector&, const std::string&); + void checkName(const std::string, std::vector); + std::string onnxTypeToString(ONNXTensorElementDataType); + + inline std::string availableInputInfo() { + std::vector inputs = inputNames(); + std::string info = "Available input names are:\n"; + for (std::vector::const_iterator i = inputs.begin(); i != inputs.end() - 1; ++i) info += *i + ", "; + return info + inputs.back() + "\n\nReconfigure this algorithm with valid input names before starting the processing."; + } + + inline std::string availableOutputInfo() { + std::vector outputs = outputNames(); + std::string info = "OnnxPredict: Available output names are:\n"; + for (std::vector::const_iterator i = outputs.begin(); i != outputs.end() - 1; ++i) info += *i + ", "; + return info + outputs.back() + "\n\nReconfigure this algorithm with valid output names before starting the processing."; + } + + public: + + OnnxPredict() : _env(Ort::Env(ORT_LOGGING_LEVEL_WARNING, "multi_io_inference")), // {"default", "test", "multi_io_inference"} - reuse it for all sessions + _sessionOptions(Ort::SessionOptions()), _session(nullptr), _runOptions(NULL), _isConfigured(false){ + declareInput(_poolIn, "poolIn", "the pool where to get the feature tensors"); + declareOutput(_poolOut, "poolOut", "the pool where to store the output tensors"); + } + + ~OnnxPredict(){ + all_input_infos.clear(); + all_output_infos.clear(); + _inputNodes.clear(); + _outputNodes.clear(); + input_tensors.clear(); + input_names.clear(); + output_names.clear(); + } + + void declareParameters() { + const char* defaultTagsC[] = { "serve" }; + std::vector defaultTags = arrayToVector(defaultTagsC); + + declareParameter("graphFilename", "the name of the file from which to load the ONNX model", "", ""); + declareParameter("inputs", "will look for these namespaces in poolIn. Should match the names of the inputs in the ONNX model", "", Parameter::VECTOR_STRING); + declareParameter("outputs", "will save the tensors on the model outputs named after `outputs` to the same namespaces in the output pool. Set the first element of this list as an empty array to print all the available model outputs", "", Parameter::VECTOR_STRING); + declareParameter("squeeze", "remove singleton dimensions of the inputs tensors. Does not apply to the batch dimension", "{true,false}", true); + declareParameter("deviceId", "the gpu device id when CUDA support is available", "[0,inf)", 0); + declareParameter("optimizationLevel", "ONNX graph optimization level to use.", "{disable_all,basic,extended,all}", "extended"); + } + + void configure(); + void compute(); + void reset(); + + static const char* name; + static const char* category; + static const char* description; +}; + +} //namespace standard +} //namespace essentia + + +#include "streamingalgorithmwrapper.h" + +namespace essentia { +namespace streaming { + +class OnnxPredict : public StreamingAlgorithmWrapper { + + protected: + Sink _poolIn; + Source _poolOut; + + public: + OnnxPredict() { + declareAlgorithm("OnnxPredict"); + declareInput(_poolIn, TOKEN, "poolIn"); + declareOutput(_poolOut, TOKEN, "poolOut"); + _poolOut.setBufferType(BufferUsage::forSingleFrames); + } +}; + +} //namespace standard +} //namespace essentia + +#endif // ESSENTIA_ONNXPREDICT_H diff --git a/src/python/essentia/standard.py b/src/python/essentia/standard.py index ea0e7d059..3ccc55203 100644 --- a/src/python/essentia/standard.py +++ b/src/python/essentia/standard.py @@ -71,7 +71,7 @@ def compute(self, *args): # we have to make some exceptions for YamlOutput and PoolAggregator # because they expect cpp Pools - if name in ('YamlOutput', 'PoolAggregator', 'SvmClassifier', 'PCA', 'GaiaTransform', 'TensorflowPredict'): + if name in ('YamlOutput', 'PoolAggregator', 'SvmClassifier', 'PCA', 'GaiaTransform', 'TensorflowPredict', 'OnnxPredict'): args = (args[0].cppPool,) # verify that all types match and do any necessary conversions @@ -105,7 +105,7 @@ def compute(self, *args): # we have to make an exceptional case for YamlInput, because we need # to wrap the Pool that it outputs w/ our python Pool from common.py - if name in ('YamlInput', 'PoolAggregator', 'SvmClassifier', 'PCA', 'GaiaTransform', 'Extractor', 'TensorflowPredict'): + if name in ('YamlInput', 'PoolAggregator', 'SvmClassifier', 'PCA', 'GaiaTransform', 'Extractor', 'TensorflowPredict', 'OnnxPredict'): return _c.Pool(results) # MusicExtractor and FreesoundExtractor output two pools diff --git a/src/wscript b/src/wscript index ab86dc469..42b5127fb 100644 --- a/src/wscript +++ b/src/wscript @@ -21,7 +21,9 @@ lib_map = { 'FFTW': 'fftw3f', 'LIBCHROMAPRINT': 'libchromaprint', 'GAIA2': 'gaia2', - 'TENSORFLOW': 'tensorflow'} + 'TENSORFLOW': 'tensorflow', + 'ONNXRUNTIME': 'libonnxruntime', + } def options(ctx): @@ -55,6 +57,9 @@ def options(ctx): ctx.add_option('--with-tensorflow', action='store_true', dest='WITH_TENSORFLOW', default=False, help='build with Tensorflow support') + ctx.add_option('--with-onnx', action='store_true', + dest='WITH_ONNXRUNTIME', default=False, + help='build with Onnx-Runtime support') ctx.add_option('--lightweight', action='store', dest='LIGHTWEIGHT', default=False, help='build lightweight version with specified dependencies (comma separated: =' + ','.join(default_libs) + ')') @@ -110,6 +115,9 @@ def configure(ctx): if ctx.env.WITH_TENSORFLOW: ctx.env.CHECK_LIBS.append('tensorflow') + if ctx.env.WITH_ONNXRUNTIME: + ctx.env.CHECK_LIBS.append('onnxruntime') + if ctx.env.IGNORE_ALGOS: for a in ctx.env.IGNORE_ALGOS.split(","): a = a.strip() @@ -201,6 +209,10 @@ def configure(ctx): ctx.check_cfg(package=lib_map['TENSORFLOW'], uselib_store='TENSORFLOW', args=check_cfg_args, mandatory=True) + if 'onnxruntime' in ctx.env.CHECK_LIBS: + ctx.check_cfg(package=lib_map['ONNXRUNTIME'], uselib_store='ONNXRUNTIME', + args=['libonnxruntime >= 1.21.1'] + check_cfg_args, mandatory=True) + # needed by ffmpeg for the INT64_C macros ctx.env.DEFINES += ['__STDC_CONSTANT_MACROS'] @@ -341,6 +353,17 @@ def configure(ctx): print(' The following algorithms will be ignored: %s' % algos) ctx.env.ALGOIGNORE += algos + + algos = [ 'OnnxPredict' ] + if has('onnxruntime'): + print('- OnnxRuntime detected!') + print(' The following algorithms will be included: %s\n' % algos) + ctx.env.USE_LIBS += ' ONNXRUNTIME' + else: + print('- Essentia is configured without Onnx-Runtime.') + print(' The following algorithms will be ignored: %s' % algos) + ctx.env.ALGOIGNORE += algos + lel = len(ctx.env.EXAMPLE_LIST) if lel: print('- Compiling %s example%s' % (lel, "" if lel == 1 else "s")) diff --git a/test/audio b/test/audio index 89df8e4d6..9d181685f 160000 --- a/test/audio +++ b/test/audio @@ -1 +1 @@ -Subproject commit 89df8e4d6fb306db6d2e14b6de6357aacda43b10 +Subproject commit 9d181685fe123624b976baf8918df335432bd2f1 diff --git a/test/models b/test/models index 3ca4130bc..c298db07d 160000 --- a/test/models +++ b/test/models @@ -1 +1 @@ -Subproject commit 3ca4130bcb398a1361867e5d8462d3a7a0c02ccd +Subproject commit c298db07de13e79d9b1ab892b074ccad4cc10c20 diff --git a/test/src/unittests/machinelearning/test_onnxpredict.py b/test/src/unittests/machinelearning/test_onnxpredict.py new file mode 100644 index 000000000..cf6ef8e67 --- /dev/null +++ b/test/src/unittests/machinelearning/test_onnxpredict.py @@ -0,0 +1,319 @@ +#!/usr/bin/env python + +# Copyright (C) 2006-2021 Music Technology Group - Universitat Pompeu Fabra +# +# This file is part of Essentia +# +# Essentia is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation (FSF), either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the Affero GNU General Public License +# version 3 along with this program. If not, see http://www.gnu.org/licenses/ + + +from essentia_test import * +import sys +import os +from pathlib import Path +import soundfile as sf + + +class TestOnnxPredict(TestCase): + + def testIONameParser(self): + model = join(testdata.models_dir, "effnetdiscogs", "effnetdiscogs-bsdynamic-1.onnx") + print(f"\nmodel: {model}") + configs = [ + { + "graphFilename": model, + "inputs": ["model/Placeholder"], + "outputs": ["model/Softmax:"], + }, # No index. + { + "graphFilename": model, + "inputs": ["model/Placeholder"], + "outputs": ["model/Softmax:3"], + }, # Index out of bounds. + { + "graphFilename": model, + "inputs": ["model/Placeholder"], + "outputs": ["model/Softmax::0"], + }, # Double colon. + { + "graphFilename": model, + "inputs": ["model/Placeholder"], + "outputs": ["model/Softmax:s:0"], + }, # Several colons. + ] + + for config in configs[1:]: + with self.subTest(f"{config} failed"): + print(config) + self.assertConfigureFails(OnnxPredict(), config) + + def testInference(self,): + + # define output metadata + outputs = [ + { + "name": "activations", + "shape": (1, 400), + }, + { + "name": "embeddings", + "shape": (1, 1280), + } + ] + + onxx_predict = OnnxPredict() + pool = Pool() + + pool_out = runEffnetDiscogsInference(onxx_predict, outputs, pool) + + self.assertEqualVector(outputs[0]["shape"], pool_out[outputs[0]["name"]].shape[2:]) + self.assertEqualVector(outputs[1]["shape"], pool_out[outputs[1]["name"]].shape[2:]) + self.assertEqual(pool_out.descriptorNames()[0], outputs[0]["name"]) + self.assertEqual(pool_out.descriptorNames()[1], outputs[1]["name"]) + + def testEmptyModelName(self): + # With empty model name the algorithm should skip the configuration without errors. + self.assertConfigureSuccess(OnnxPredict(), {}) + self.assertConfigureSuccess(OnnxPredict(), {"graphFilename": ""}) + self.assertConfigureSuccess( + OnnxPredict(), {"graphFilename": "", "inputs": [""]} + ) + self.assertConfigureSuccess( + OnnxPredict(), {"graphFilename": "", "inputs": ["wrong_input"]} + ) + + def testInvalidParam(self): + model = join(testdata.models_dir, "effnetdiscogs", "effnetdiscogs-bsdynamic-1.onnx") + self.assertConfigureFails( + OnnxPredict(), + { + "graphFilename": model, + "inputs": ["wrong_input_name"], + "outputs": ["embeddings"], + }, + ) # input does not exist in the model + self.assertConfigureFails( + OnnxPredict(), + { + "graphFilename": "wrong_model_name", #! I suspect the issue is here with OnnxExceptions + "inputs": ["melspectrogram"], + "outputs": ["embeddings"], + }, + ) # the model does not exist + + def testIdentityModel(self): + + # prepare model inputs and batches + input1, input2 = (numpy.float32(numpy.random.random((3, 3))) for _ in range(2)) + + n, m = input1.shape + + batch1 = input1.reshape(n, 1, 1, m) + batch2 = input2.reshape(n, 1, 1, m) + + pool = Pool() + pool.set("input1", batch1) + pool.set("input2", batch2) + + found_values1, found_values2 = runIdentityModelInference(OnnxPredict(), pool) + + self.assertAlmostEqualMatrix(found_values1, batch1) + self.assertAlmostEqualMatrix(found_values2, batch2) + + def testComputeWithoutConfiguration(self): + pool = Pool() + pool.set("melspectrogram", numpy.zeros((1, 1, 1, 1), dtype="float32")) + + self.assertComputeFails(OnnxPredict(), pool) + + def testIgnoreInvalidReconfiguration(self): + pool = Pool() + pool.set("input1", numpy.ones((1, 1, 1, 3), dtype="float32")) + pool.set("input2", numpy.ones((1, 1, 1, 3), dtype="float32")) + + model_name = join(testdata.models_dir, "identity", "identity2x2.onnx") + model = OnnxPredict( + graphFilename=model_name, + inputs=["input1", "input2"], + outputs=["output1"], + squeeze=True, + ) + + firstResult = model(pool) + + # This attempt to reconfigure the algorithm should be ignored and trigger a Warning. + model.configure() + + secondResult = model(pool) + + self.assertEqualMatrix(firstResult["output1"], secondResult["output1"]) + + def testInvalidSqueezeConfiguration(self): + model = join(testdata.models_dir, "identity", "identity2x2.onnx") + + # prepare model inputs and batches + input1, input2 = (numpy.float32(numpy.random.random((3, 3))) for _ in range(2)) + + n, m = input1.shape + + batch1 = input1.reshape(n, 1, 1, m) + batch2 = input2.reshape(n, 1, 1, m) + + pool = Pool() + pool.set("input1", batch1) + pool.set("input2", batch2) + + onnx_predict = OnnxPredict( + graphFilename=model, + inputs=["input1", "input2"], + outputs=["output1", "output2"], + squeeze=False, + ) + self.assertComputeFails(onnx_predict, pool) + + def testConfigure(self): + # define output metadata + outputs = [ + { + "name": "activations", + "shape": (1, 400), + }, + { + "name": "embeddings", + "shape": (1, 1280), + } + ] + + onxx_predict = OnnxPredict() + pool = Pool() + + _ = runEffnetDiscogsInference(onxx_predict, outputs, pool) + pool.clear() + + # prepare model inputs and batches for identity model + input1, input2 = (numpy.float32(numpy.random.random((3, 3))) for _ in range(2)) + + n, m = input1.shape + + batch1 = input1.reshape(n, 1, 1, m) + batch2 = input2.reshape(n, 1, 1, m) + + pool.set("input1", batch1) + pool.set("input2", batch2) + + found_values1, found_values2 = runIdentityModelInference(onxx_predict, pool) + + self.assertAlmostEqualMatrix(found_values1, batch1) + self.assertAlmostEqualMatrix(found_values2, batch2) + + def test_default_optimization_level(self): + """Check that the default optimization level is 'extended'.""" + onnx = OnnxPredict() + # Create a minimal pool with dummy input + n, m = (3 for _ in range(2)) + batch1 = ones((n, 1, 1, m)) + batch2 = ones((n, 1, 1, m),) + pool = Pool() + pool.set("input1", batch1) + pool.set("input2", batch2) + + # Run inference to fully configure OnnxPredict + runIdentityModelInference(onnx, pool) + self.assertEqual(onnx.paramValue("optimizationLevel"), "extended") + + def test_set_valid_optimization_levels(self): + """Check that valid optimization levels can be set without errors.""" + + n = 3 # n = batch size, m = feature dimension + input1 = array([0, 1, 2]) + input2 = array([3, 4, 5]) # different values + batch1 = input1.reshape(1, n, 1, 1) + batch2 = input2.reshape(1, n, 1, 1) + + pool = Pool() + pool.set("input1", batch1) + pool.set("input2", batch2) + valid_levels = ["disable_all", "basic", "extended", "all"] + + for level in valid_levels: + out1, out2 = runIdentityModelInference(OnnxPredict(), pool, level) + # verify outputs match inputs + self.assertEqualVector(out1.flatten(), input1) + self.assertEqualVector(out2.flatten(), input2) + + def test_set_invalid_optimization_level(self): + """Check that invalid optimization levels raise an error.""" + # We don’t need a pool because the configuration itself should fail + with self.assertRaises(RuntimeError): + OnnxPredict(optimizationLevel="super_extended") + +def runEffnetDiscogsInference(onnx_predict, outputs, pool) -> Pool: + model_path = join(testdata.models_dir, "effnetdiscogs", "effnetdiscogs-bsdynamic-1.onnx") + + stem = "359500__mtg__sax-tenor-e-major" + audio_path = join(testdata.audio_dir, Path("recorded"), f"{stem}.wav") + + audio, _ = sf.read(audio_path, dtype=numpy.float32) + + onnx_predict.configure( + graphFilename= model_path, + inputs=["melspectrogram"], + outputs=[output["name"] for output in outputs], + ) + + frame_size = 512 + hop_size = 256 + patch_size = 128 + number_bands = 96 + + w = Windowing(type="hann", zeroPadding=frame_size) + spectrum = Spectrum(size=frame_size) + mels = MelBands(inputSize=frame_size+1,numberBands=number_bands, type="magnitude") + logNorm = UnaryOperator(type="log") + + # compute mel bands + bands = [] + for frame in FrameGenerator(audio, frameSize=frame_size, hopSize=hop_size): + melFrame = mels(spectrum(w(frame))) + bands.append(logNorm(melFrame)) + bands = array(bands) + + discard = bands.shape[0] % patch_size + bands = numpy.reshape(bands[:-discard, :], [-1, patch_size, number_bands]) + batch = numpy.expand_dims(bands, 1) + + pool.set("melspectrogram", batch) + + return onnx_predict(pool) + +def runIdentityModelInference(onnx_predict, pool, optimizationLevel="extended"): + model_path = join(testdata.models_dir, "identity", "identity2x2.onnx") + + onnx_predict.configure( + graphFilename=model_path, + inputs=["input1", "input2"], + outputs=["output1", "output2"], + squeeze=True, + optimizationLevel=optimizationLevel, + ) + + poolOut = onnx_predict(pool) + + return poolOut["output1"], poolOut["output2"] + + +suite = allTests(TestOnnxPredict) + +if __name__ == "__main__": + TextTestRunner(verbosity=2).run(suite) diff --git a/wscript b/wscript index a48d304c8..f9ed031bc 100644 --- a/wscript +++ b/wscript @@ -92,6 +92,7 @@ def configure(ctx): ctx.env.PKG_CONFIG_PATH = ctx.options.PKG_CONFIG_PATH ctx.env.WITH_GAIA = ctx.options.WITH_GAIA ctx.env.WITH_TENSORFLOW = ctx.options.WITH_TENSORFLOW + ctx.env.WITH_ONNXRUNTIME = ctx.options.WITH_ONNXRUNTIME ctx.env.LIGHTWEIGHT = ctx.options.LIGHTWEIGHT ctx.env.EXAMPLES = ctx.options.EXAMPLES ctx.env.EXAMPLE_LIST = []