Skip to content

Commit 4d1963c

Browse files
sfatimarsspintelpreetha-intel
authored
OpenVINO EP Rel 1.18 Changes (microsoft#20337)
### Description These changes include Support to OpenVINO 2024.1 Import PreCompiled Blobs with EPContext Blob Separate Device/Precision as input Deprecate CPU_FP32 , GPU_FP32 terminology , introduce CPU, GPU AUTO GPU, CPU will only create GPU Blob and not CPU Blob. ### Motivation and Context - OpenVINO 2024.1 will be out soon - Import Precompiled Blob can greatly reduce FEIL/FIL Time. - Separating Device/Precision will make the input cleaner - --------- Co-authored-by: Suryaprakash Shanmugam <[email protected]> Co-authored-by: Preetha Veeramalai <[email protected]>
1 parent 9001c69 commit 4d1963c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+827
-710
lines changed

Diff for: cmake/CMakeLists.txt

+8-26
Original file line numberDiff line numberDiff line change
@@ -1325,43 +1325,25 @@ if (onnxruntime_USE_OPENVINO)
13251325

13261326
add_definitions(-DUSE_OPENVINO=1)
13271327

1328-
if (onnxruntime_USE_OPENVINO_GPU_FP32)
1329-
add_definitions(-DOPENVINO_CONFIG_GPU_FP32=1)
1328+
if (onnxruntime_USE_OPENVINO_GPU)
1329+
add_definitions(-DOPENVINO_CONFIG_GPU=1)
13301330
endif()
13311331

1332-
if (onnxruntime_USE_OPENVINO_GPU_FP16)
1333-
add_definitions(-DOPENVINO_CONFIG_GPU_FP16=1)
1334-
endif()
1335-
1336-
if (onnxruntime_USE_OPENVINO_CPU_FP32)
1337-
add_definitions(-DOPENVINO_CONFIG_CPU_FP32=1)
1338-
endif()
1339-
1340-
if (onnxruntime_USE_OPENVINO_CPU_FP16)
1341-
add_definitions(-DOPENVINO_CONFIG_CPU_FP16=1)
1332+
if (onnxruntime_USE_OPENVINO_CPU)
1333+
add_definitions(-DOPENVINO_CONFIG_CPU=1)
13421334
endif()
13431335

13441336
if (onnxruntime_USE_OPENVINO_NPU)
13451337
add_definitions(-DOPENVINO_CONFIG_NPU=1)
13461338
endif()
13471339

1348-
if (onnxruntime_USE_OPENVINO_GPU_FP32_NP)
1349-
add_definitions(-DOPENVINO_CONFIG_GPU_FP32=1)
1350-
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
1351-
endif()
1352-
1353-
if (onnxruntime_USE_OPENVINO_GPU_FP16_NP)
1354-
add_definitions(-DOPENVINO_CONFIG_GPU_FP16=1)
1355-
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
1356-
endif()
1357-
1358-
if (onnxruntime_USE_OPENVINO_CPU_FP32_NP)
1359-
add_definitions(-DOPENVINO_CONFIG_CPU_FP32=1)
1340+
if (onnxruntime_USE_OPENVINO_GPU_NP)
1341+
add_definitions(-DOPENVINO_CONFIG_GPU=1)
13601342
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
13611343
endif()
13621344

1363-
if (onnxruntime_USE_OPENVINO_CPU_FP16_NP)
1364-
add_definitions(-DOPENVINO_CONFIG_CPU_FP16=1)
1345+
if (onnxruntime_USE_OPENVINO_CPU_NP)
1346+
add_definitions(-DOPENVINO_CONFIG_CPU=1)
13651347
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
13661348
endif()
13671349

Diff for: dockerfiles/Dockerfile.openvino

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ ENV WORKDIR_PATH=/home/openvino
1313
WORKDIR $WORKDIR_PATH
1414
ENV DEBIAN_FRONTEND noninteractive
1515

16-
ARG DEVICE=CPU_FP32
16+
ARG DEVICE=CPU
1717
ARG ONNXRUNTIME_REPO=https://github.com/microsoft/onnxruntime.git
1818
ARG ONNXRUNTIME_BRANCH=main
1919

Diff for: onnxruntime/core/providers/openvino/backend_manager.cc

+65-12
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22
// Licensed under the MIT License
33

44
#include <fstream>
5+
#include <sstream>
56
#include <utility>
6-
#include <exception>
77

88
#include "core/providers/shared_library/provider_api.h"
9-
#include "contexts.h"
10-
#include "backend_manager.h"
11-
#include "ibackend.h"
12-
#include "backend_utils.h"
9+
#include "core/providers/openvino/contexts.h"
10+
#include "core/providers/openvino/backend_manager.h"
11+
#include "core/providers/openvino/ibackend.h"
12+
#include "core/providers/openvino/backend_utils.h"
1313

1414
namespace onnxruntime {
1515
namespace openvino_ep {
@@ -21,8 +21,17 @@ GlobalContext& BackendManager::GetGlobalContext() {
2121
BackendManager::BackendManager(const GlobalContext& global_context,
2222
const onnxruntime::Node& fused_node,
2323
const onnxruntime::GraphViewer& subgraph,
24-
const logging::Logger& logger) {
24+
const logging::Logger& logger,
25+
EPCtxHandler& ctx_handle) {
2526
global_context_ = global_context;
27+
ep_ctx_handle_ = ctx_handle;
28+
29+
openvino_sdk_version_ = std::to_string(global_context_.OpenVINO_Version.at(0)) + "." +
30+
std::to_string(global_context_.OpenVINO_Version.at(1));
31+
if (ep_ctx_handle_.CheckForOVEPCtxNode(subgraph, openvino_sdk_version_)) {
32+
if (ep_ctx_handle_.ImportBlobFromEPCtxModel(subgraph) != Status::OK())
33+
ORT_THROW("Import blob from model failed");
34+
}
2635

2736
auto prec_str = GetGlobalContext().precision_str;
2837

@@ -66,7 +75,8 @@ BackendManager::BackendManager(const GlobalContext& global_context,
6675
try {
6776
concrete_backend_ = BackendFactory::MakeBackend(*model_proto_,
6877
GetGlobalContext(),
69-
subgraph_context_);
78+
subgraph_context_,
79+
ep_ctx_handle_);
7080
} catch (std::string const& msg) {
7181
ORT_THROW(msg);
7282
}
@@ -85,7 +95,8 @@ BackendManager::BackendManager(const GlobalContext& global_context,
8595
try {
8696
concrete_backend_ = BackendFactory::MakeBackend(*model_proto_,
8797
GetGlobalContext(),
88-
subgraph_context_);
98+
subgraph_context_,
99+
ep_ctx_handle_);
89100
} catch (const OnnxRuntimeException& ex) {
90101
if (device_type.find("NPU") != std::string::npos) {
91102
LOGS_DEFAULT(WARNING) << ex.what();
@@ -96,7 +107,8 @@ BackendManager::BackendManager(const GlobalContext& global_context,
96107
try {
97108
concrete_backend_ = BackendFactory::MakeBackend(*model_proto_,
98109
GetGlobalContext(),
99-
subgraph_context_);
110+
subgraph_context_,
111+
ep_ctx_handle_);
100112
} catch (std::string const& msg) {
101113
ORT_THROW(msg);
102114
}
@@ -107,6 +119,45 @@ BackendManager::BackendManager(const GlobalContext& global_context,
107119
}
108120
}
109121

122+
// Call EPContext model exporter here if the provider option for exporting
123+
// precompiled blob is set. If that's the case:
124+
// By default, create model in embed mode where the blob stream is exported as data within
125+
// the EPContext node.
126+
Status BackendManager::ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphViewer& graph_body_viewer,
127+
const logging::Logger& logger) {
128+
std::string model_blob_str;
129+
auto compiled_model = concrete_backend_->GetOVCompiledModel();
130+
auto graph_name = global_context_.onnx_model_path_name;
131+
// Remove extension so we can append suffix to form the complete name of output graph
132+
graph_name = [&]() {
133+
size_t dot = graph_name.find_last_of(".");
134+
if (dot == std::string::npos) return graph_name;
135+
return graph_name.substr(0, dot);
136+
}();
137+
// If embed_mode, then pass on the serialized blob
138+
// If not embed_mode, dump the blob here and only pass on the path to the blob
139+
if (global_context_.ep_context_embed_mode) {
140+
std::ostringstream model_blob_stream;
141+
compiled_model.export_model(model_blob_stream);
142+
model_blob_str = model_blob_stream.str();
143+
ORT_ENFORCE(model_blob_str.size() != 0);
144+
} else {
145+
std::ofstream f(graph_name + ".blob", std::ios::out | std::ios::trunc | std::ios::binary);
146+
compiled_model.export_model(f);
147+
model_blob_str = graph_name + ".blob";
148+
}
149+
150+
ORT_RETURN_IF_ERROR(ep_ctx_handle_.ExportEPCtxModel(graph_body_viewer,
151+
graph_name,
152+
logger,
153+
global_context_.ep_context_embed_mode,
154+
model_blob_str,
155+
openvino_sdk_version_,
156+
GetGlobalContext().device_type));
157+
158+
return Status::OK();
159+
}
160+
110161
bool BackendManager::ModelHasBatchedInputs(const ONNX_NAMESPACE::ModelProto& model_proto) const {
111162
bool has_batched_inputs = true;
112163

@@ -182,7 +233,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
182233
return model_proto;
183234
}
184235

185-
std::vector<std::vector<int64_t>> GetInputTensorShapes(Ort::KernelContext& context) {
236+
std::vector<std::vector<int64_t>> GetInputTensorShapes(const Ort::KernelContext& context) {
186237
const auto input_count = context.GetInputCount();
187238
std::vector<std::vector<int64_t>> input_shapes;
188239
input_shapes.reserve(input_count);
@@ -289,7 +340,8 @@ void BackendManager::Compute(OrtKernelContext* context) {
289340
try {
290341
dynamic_backend = BackendFactory::MakeBackend(*modelproto_with_concrete_shapes,
291342
GetGlobalContext(),
292-
subgraph_context_);
343+
subgraph_context_,
344+
ep_ctx_handle_);
293345
} catch (const OnnxRuntimeException& ex) {
294346
if (GetGlobalContext().device_type.find("NPU") != std::string::npos) {
295347
LOGS_DEFAULT(WARNING) << ex.what();
@@ -301,7 +353,8 @@ void BackendManager::Compute(OrtKernelContext* context) {
301353
try {
302354
dynamic_backend = BackendFactory::MakeBackend(*modelproto_with_concrete_shapes,
303355
GetGlobalContext(),
304-
subgraph_context_);
356+
subgraph_context_,
357+
ep_ctx_handle_);
305358
} catch (std::string const& msg) {
306359
ORT_THROW(msg);
307360
}

Diff for: onnxruntime/core/providers/openvino/backend_manager.h

+10-4
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,10 @@
88
#include <memory>
99
#include <string>
1010

11-
#include "ov_interface.h"
12-
#include "contexts.h"
13-
#include "ibackend.h"
11+
#include "core/providers/openvino/ov_interface.h"
12+
#include "core/providers/openvino/contexts.h"
13+
#include "core/providers/openvino/onnx_ctx_model_helper.h"
14+
#include "core/providers/openvino/ibackend.h"
1415

1516
namespace onnxruntime {
1617
namespace openvino_ep {
@@ -21,11 +22,14 @@ class BackendManager {
2122
BackendManager(const GlobalContext& global_context,
2223
const onnxruntime::Node& fused_node,
2324
const onnxruntime::GraphViewer& subgraph,
24-
const logging::Logger& logger);
25+
const logging::Logger& logger,
26+
EPCtxHandler& ctx_handle);
2527
void Compute(OrtKernelContext* context);
2628
void ShutdownBackendManager();
2729
void SetGlobalCotext(const GlobalContext& global_context);
2830
GlobalContext& GetGlobalContext();
31+
Status ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphViewer& subgraph,
32+
const logging::Logger& logger);
2933

3034
private:
3135
std::unique_ptr<ONNX_NAMESPACE::ModelProto> GetModelProtoFromFusedNode(
@@ -47,6 +51,8 @@ class BackendManager {
4751
std::map<std::string, std::shared_ptr<IBackend>> backend_map_;
4852
SubGraphContext subgraph_context_;
4953
GlobalContext global_context_;
54+
EPCtxHandler ep_ctx_handle_{};
55+
std::string openvino_sdk_version_{};
5056
};
5157

5258
} // namespace openvino_ep

Diff for: onnxruntime/core/providers/openvino/backend_utils.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55
#include <sstream>
66
#include <fstream>
77

8-
#include "ov_interface.h"
98
#include "openvino/pass/convert_fp32_to_fp16.hpp"
109
#include "openvino/pass/constant_folding.hpp"
1110
#include "core/providers/shared_library/provider_api.h"
12-
#include "backend_utils.h"
11+
#include "core/providers/openvino/backend_utils.h"
12+
#include "core/providers/openvino/ov_interface.h"
1313

1414
using Exception = ov::Exception;
1515

Diff for: onnxruntime/core/providers/openvino/backend_utils.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
#include <string>
1313

1414
#include "core/session/onnxruntime_cxx_api.h"
15-
#include "contexts.h"
16-
#include "ov_interface.h"
15+
#include "core/providers/openvino/contexts.h"
16+
#include "core/providers/openvino/ov_interface.h"
1717
#ifdef _WIN32
1818
#include <direct.h>
1919
#define GetCurrentDir _getcwd

Diff for: onnxruntime/core/providers/openvino/backends/backend_factory.cc

+4-3
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,16 @@
55
#include "core/providers/shared_library/provider_api.h"
66
#include "core/providers/openvino/contexts.h"
77
#include "core/providers/openvino/ibackend.h"
8-
#include "basic_backend.h"
8+
#include "core/providers/openvino/backends/basic_backend.h"
99

1010
namespace onnxruntime {
1111
namespace openvino_ep {
1212

1313
std::shared_ptr<IBackend>
1414
BackendFactory::MakeBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
1515
GlobalContext& global_context,
16-
const SubGraphContext& subgraph_context) {
16+
const SubGraphContext& subgraph_context,
17+
EPCtxHandler& ep_ctx_handle) {
1718
std::string type = global_context.device_type;
1819
if (type == "CPU" || type.find("GPU") != std::string::npos ||
1920
type.find("NPU") != std::string::npos ||
@@ -22,7 +23,7 @@ BackendFactory::MakeBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
2223
type.find("AUTO") != std::string::npos) {
2324
std::shared_ptr<IBackend> concrete_backend_;
2425
try {
25-
concrete_backend_ = std::make_shared<BasicBackend>(model_proto, global_context, subgraph_context);
26+
concrete_backend_ = std::make_shared<BasicBackend>(model_proto, global_context, subgraph_context, ep_ctx_handle);
2627
} catch (std::string const& msg) {
2728
ORT_THROW(msg);
2829
}

0 commit comments

Comments
 (0)