Skip to content

Commit 3ca0979

Browse files
committed
Address review comment: Remove duplicate NPU fallback logic
Per MayureshV1's review comment, the NPU->CPU fallback logic was moved to basic_backend.cc in PR #723. This commit removes the duplicate implementation from backend_manager.cc constructor to avoid conflicts and maintain single responsibility.
1 parent 49fe713 commit 3ca0979

File tree

3 files changed

+456
-479
lines changed

3 files changed

+456
-479
lines changed

onnxruntime/core/providers/openvino/backend_manager.cc

Lines changed: 5 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -159,39 +159,11 @@ BackendManager::BackendManager(SessionContext& session_context,
159159
subgraph_context_.has_dynamic_input_shape = false;
160160

161161
// OV NPU plugin is supported with fallback to OV CPU upon compilation failures.
162-
try {
163-
concrete_backend_ = BackendFactory::MakeBackend(model_proto,
164-
session_context_,
165-
subgraph_context_,
166-
shared_context_,
167-
model_stream);
168-
} catch (const ovep_exception& ex) {
169-
#ifndef OPENVINO_DISABLE_NPU_FALLBACK
170-
bool eligible_for_cpu_fallback = session_context_.device_type.find("NPU") != std::string::npos &&
171-
!session_context_.so_disable_cpu_ep_fallback &&
172-
!subgraph_context_.is_ep_ctx_graph;
173-
if (eligible_for_cpu_fallback) {
174-
std::string exception_str = ex.what();
175-
LOGS_DEFAULT(VERBOSE) << exception_str;
176-
LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU."
177-
<< "Falling back to OV CPU for execution";
178-
session_context_.device_type = "CPU";
179-
session_context_.precision = "FP32";
180-
try {
181-
concrete_backend_ = BackendFactory::MakeBackend(model_proto,
182-
session_context_,
183-
subgraph_context_,
184-
shared_context_,
185-
model_stream);
186-
} catch (std::string const& msg) {
187-
ORT_THROW(msg);
188-
}
189-
} else
190-
#endif
191-
{
192-
throw ex;
193-
}
194-
}
162+
concrete_backend_ = BackendFactory::MakeBackend(model_proto,
163+
session_context_,
164+
subgraph_context_,
165+
shared_context_,
166+
model_stream);
195167
}
196168
if (session_context_.so_context_enable &&
197169
(subgraph_context_.is_ep_ctx_ovir_encapsulated || !subgraph_context_.is_ep_ctx_graph)) {

onnxruntime/core/providers/openvino/openvino_execution_provider.cc

Lines changed: 108 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -105,133 +105,133 @@ common::Status OpenVINOExecutionProvider::Compile(
105105

106106
try {
107107
bool is_epctx_model = false;
108-
if (!fused_nodes.empty()) {
109-
// Assume these properties are constant for all the model subgraphs, otherwise move to SubGraphContext
110-
const auto& graph_body_viewer_0 = fused_nodes[0].filtered_graph.get();
111-
session_context_.onnx_model_path_name = graph_body_viewer_0.ModelPath().string();
112-
session_context_.onnx_opset_version =
113-
graph_body_viewer_0.DomainToVersionMap().at(kOnnxDomain);
114-
115-
// OVIR wrapped in epctx should be treated as source but this code does not
116-
// This corner case is not in use and will be addressed in a future commit
117-
is_epctx_model = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(graph_body_viewer_0);
118-
}
119-
120-
// The block below is executed during EP context model inference
121-
auto& metadata = shared_context_->shared_weights.metadata; // Metadata object in memory
122-
if (session_context_.so_share_ep_contexts &&
123-
is_epctx_model &&
124-
metadata.empty()) {
125-
fs::path context_model_file_path = session_context_.so_context_file_path;
126-
if (context_model_file_path.empty()) {
127-
// If ep.context_file_path is not set the input model path is used
128-
context_model_file_path = session_context_.onnx_model_path_name;
129-
}
108+
if (!fused_nodes.empty()) {
109+
// Assume these properties are constant for all the model subgraphs, otherwise move to SubGraphContext
110+
const auto& graph_body_viewer_0 = fused_nodes[0].filtered_graph.get();
111+
session_context_.onnx_model_path_name = graph_body_viewer_0.ModelPath().string();
112+
session_context_.onnx_opset_version =
113+
graph_body_viewer_0.DomainToVersionMap().at(kOnnxDomain);
114+
115+
// OVIR wrapped in epctx should be treated as source but this code does not
116+
// This corner case is not in use and will be addressed in a future commit
117+
is_epctx_model = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(graph_body_viewer_0);
118+
}
130119

131-
// Metadata is always read from model location, this could be a source or epctx model
132-
fs::path metadata_filename = context_model_file_path.stem().string() + "_metadata.bin";
133-
fs::path metadata_file_path = context_model_file_path.parent_path() / metadata_filename;
134-
std::ifstream file(metadata_file_path, std::ios::binary);
135-
ORT_RETURN_IF_NOT(file, "Metadata file was not found: " + metadata_file_path.string());
136-
shared_context_->shared_weights.metadata_filepath = std::move(metadata_file_path);
137-
file >> metadata;
120+
// The block below is executed during EP context model inference
121+
auto& metadata = shared_context_->shared_weights.metadata; // Metadata object in memory
122+
if (session_context_.so_share_ep_contexts &&
123+
is_epctx_model &&
124+
metadata.empty()) {
125+
fs::path context_model_file_path = session_context_.so_context_file_path;
126+
if (context_model_file_path.empty()) {
127+
// If ep.context_file_path is not set the input model path is used
128+
context_model_file_path = session_context_.onnx_model_path_name;
138129
}
139130

140-
struct OpenVINOEPFunctionState {
141-
AllocateFunc allocate_func = nullptr;
142-
DestroyFunc destroy_func = nullptr;
143-
AllocatorHandle allocator_handle = nullptr;
144-
BackendManager& backend_manager;
145-
};
131+
// Metadata is always read from model location, this could be a source or epctx model
132+
fs::path metadata_filename = context_model_file_path.stem().string() + "_metadata.bin";
133+
fs::path metadata_file_path = context_model_file_path.parent_path() / metadata_filename;
134+
std::ifstream file(metadata_file_path, std::ios::binary);
135+
ORT_RETURN_IF_NOT(file, "Metadata file was not found: " + metadata_file_path.string());
136+
shared_context_->shared_weights.metadata_filepath = std::move(metadata_file_path);
137+
file >> metadata;
138+
}
146139

147-
for (const FusedNodeAndGraph& fused_node_graph : fused_nodes) {
148-
const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph;
149-
const Node& fused_node = fused_node_graph.fused_node;
150-
151-
NodeComputeInfo compute_info;
152-
153-
// During backend creation, we check if user wants to use precompiled blob onnx model or the original model
154-
// For precompiled blob, directly load the model instead of compiling the model
155-
// For original model, check if the user wants to export a model with pre-compiled blob
156-
157-
auto& backend_manager = backend_managers_.emplace_back(session_context_,
158-
*shared_context_,
159-
fused_node,
160-
graph_body_viewer,
161-
logger,
162-
ep_ctx_handle_);
163-
164-
compute_info.create_state_func =
165-
[&backend_manager](ComputeContext* context, FunctionState* state) {
166-
OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState{
167-
.allocate_func = context->allocate_func,
168-
.destroy_func = context->release_func,
169-
.allocator_handle = context->allocator_handle,
170-
.backend_manager = backend_manager};
171-
*state = static_cast<FunctionState>(p);
172-
return 0;
173-
};
174-
175-
compute_info.compute_func = [](FunctionState state, const OrtApi* /* api */, OrtKernelContext* context) {
176-
auto function_state = static_cast<OpenVINOEPFunctionState*>(state);
177-
try {
178-
function_state->backend_manager.Compute(context);
179-
} catch (const std::exception& ex) {
180-
return common::Status(common::ONNXRUNTIME, common::FAIL, ex.what());
181-
}
182-
return Status::OK();
183-
};
140+
struct OpenVINOEPFunctionState {
141+
AllocateFunc allocate_func = nullptr;
142+
DestroyFunc destroy_func = nullptr;
143+
AllocatorHandle allocator_handle = nullptr;
144+
BackendManager& backend_manager;
145+
};
146+
147+
for (const FusedNodeAndGraph& fused_node_graph : fused_nodes) {
148+
const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph;
149+
const Node& fused_node = fused_node_graph.fused_node;
150+
151+
NodeComputeInfo compute_info;
152+
153+
// During backend creation, we check if user wants to use precompiled blob onnx model or the original model
154+
// For precompiled blob, directly load the model instead of compiling the model
155+
// For original model, check if the user wants to export a model with pre-compiled blob
156+
157+
auto& backend_manager = backend_managers_.emplace_back(session_context_,
158+
*shared_context_,
159+
fused_node,
160+
graph_body_viewer,
161+
logger,
162+
ep_ctx_handle_);
163+
164+
compute_info.create_state_func =
165+
[&backend_manager](ComputeContext* context, FunctionState* state) {
166+
OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState{
167+
.allocate_func = context->allocate_func,
168+
.destroy_func = context->release_func,
169+
.allocator_handle = context->allocator_handle,
170+
.backend_manager = backend_manager};
171+
*state = static_cast<FunctionState>(p);
172+
return 0;
173+
};
174+
175+
compute_info.compute_func = [](FunctionState state, const OrtApi* /* api */, OrtKernelContext* context) {
176+
auto function_state = static_cast<OpenVINOEPFunctionState*>(state);
177+
try {
178+
function_state->backend_manager.Compute(context);
179+
} catch (const std::exception& ex) {
180+
return common::Status(common::ONNXRUNTIME, common::FAIL, ex.what());
181+
}
182+
return Status::OK();
183+
};
184184

185-
compute_info.release_state_func =
186-
[](FunctionState state) {
187-
if (state) {
188-
OpenVINOEPFunctionState* function_state = static_cast<OpenVINOEPFunctionState*>(state);
189-
delete function_state;
190-
}
191-
};
185+
compute_info.release_state_func =
186+
[](FunctionState state) {
187+
if (state) {
188+
OpenVINOEPFunctionState* function_state = static_cast<OpenVINOEPFunctionState*>(state);
189+
delete function_state;
190+
}
191+
};
192192

193-
node_compute_funcs.push_back(std::move(compute_info));
193+
node_compute_funcs.push_back(std::move(compute_info));
194194

195-
if (!status.IsOK()) {
196-
break;
197-
}
195+
if (!status.IsOK()) {
196+
break;
198197
}
198+
}
199199

200-
// The block below is executed during EP context model generation
201-
if (session_context_.so_context_enable &&
202-
session_context_.so_share_ep_contexts &&
203-
!metadata.empty()) {
204-
// For models after the first the metadata name comes from the shared context
205-
fs::path metadata_file_path = shared_context_->shared_weights.metadata_filepath;
200+
// The block below is executed during EP context model generation
201+
if (session_context_.so_context_enable &&
202+
session_context_.so_share_ep_contexts &&
203+
!metadata.empty()) {
204+
// For models after the first the metadata name comes from the shared context
205+
fs::path metadata_file_path = shared_context_->shared_weights.metadata_filepath;
206+
if (metadata_file_path.empty()) {
207+
metadata_file_path = session_context_.so_context_file_path;
208+
std::string name_append{"_metadata.bin"};
206209
if (metadata_file_path.empty()) {
207-
metadata_file_path = session_context_.so_context_file_path;
208-
std::string name_append{"_metadata.bin"};
209-
if (metadata_file_path.empty()) {
210-
metadata_file_path = session_context_.onnx_model_path_name;
211-
name_append = "_ctx" + name_append;
212-
}
213-
auto metadata_filename = metadata_file_path.stem().string() + name_append;
214-
metadata_file_path.replace_filename(metadata_filename);
215-
shared_context_->shared_weights.metadata_filepath = metadata_file_path;
210+
metadata_file_path = session_context_.onnx_model_path_name;
211+
name_append = "_ctx" + name_append;
216212
}
217-
218-
// Metadata is generated only for shared contexts
219-
// If saving metadata then save it to the provided path or use the original model path
220-
// Multiple calls to Compile() will update the metadata and for the last call
221-
// the resulting file will contain the aggregated content
222-
std::ofstream file{metadata_file_path, std::ios::binary};
223-
ORT_RETURN_IF_NOT(file, "Metadata file could not be written: ", metadata_file_path);
224-
file << metadata;
213+
auto metadata_filename = metadata_file_path.stem().string() + name_append;
214+
metadata_file_path.replace_filename(metadata_filename);
215+
shared_context_->shared_weights.metadata_filepath = metadata_file_path;
225216
}
226-
} catch (const ovep_exception& ex) {
227-
status = ex;
217+
218+
// Metadata is generated only for shared contexts
219+
// If saving metadata then save it to the provided path or use the original model path
220+
// Multiple calls to Compile() will update the metadata and for the last call
221+
// the resulting file will contain the aggregated content
222+
std::ofstream file{metadata_file_path, std::ios::binary};
223+
ORT_RETURN_IF_NOT(file, "Metadata file could not be written: ", metadata_file_path);
224+
file << metadata;
228225
}
229226

230227
if (session_context_.so_stop_share_ep_contexts) {
231228
if (shared_context_) {
232229
shared_context_->clear();
233230
}
234231
}
232+
} catch (const ovep_exception& ex) {
233+
status = ex;
234+
}
235235

236236
return status;
237237
}

0 commit comments

Comments
 (0)