Address review comment: Remove duplicate NPU fallback logic

ankitm3k · ankitm3k · commit 3ca097908afa · 2025-11-11T17:39:26.000+05:30
Per MayureshV1's review comment, the NPU->CPU fallback logic was moved to basic_backend.cc in PR #723. This commit removes the duplicate implementation from backend_manager.cc constructor to avoid conflicts and maintain single responsibility.
diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -159,39 +159,11 @@ BackendManager::BackendManager(SessionContext& session_context,
     subgraph_context_.has_dynamic_input_shape = false;
 
     // OV NPU plugin is supported with fallback to OV CPU upon compilation failures.
-    try {
-      concrete_backend_ = BackendFactory::MakeBackend(model_proto,
-                                                      session_context_,
-                                                      subgraph_context_,
-                                                      shared_context_,
-                                                      model_stream);
-    } catch (const ovep_exception& ex) {
-#ifndef OPENVINO_DISABLE_NPU_FALLBACK
-      bool eligible_for_cpu_fallback = session_context_.device_type.find("NPU") != std::string::npos &&
-                                       !session_context_.so_disable_cpu_ep_fallback &&
-                                       !subgraph_context_.is_ep_ctx_graph;
-      if (eligible_for_cpu_fallback) {
-        std::string exception_str = ex.what();
-        LOGS_DEFAULT(VERBOSE) << exception_str;
-        LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU."
-                              << "Falling back to OV CPU for execution";
-        session_context_.device_type = "CPU";
-        session_context_.precision = "FP32";
-        try {
-          concrete_backend_ = BackendFactory::MakeBackend(model_proto,
-                                                          session_context_,
-                                                          subgraph_context_,
-                                                          shared_context_,
-                                                          model_stream);
-        } catch (std::string const& msg) {
-          ORT_THROW(msg);
-        }
-      } else
-#endif
-      {
-        throw ex;
-      }
-    }
+    concrete_backend_ = BackendFactory::MakeBackend(model_proto,
+                                                    session_context_,
+                                                    subgraph_context_,
+                                                    shared_context_,
+                                                    model_stream);
   }
   if (session_context_.so_context_enable &&
       (subgraph_context_.is_ep_ctx_ovir_encapsulated || !subgraph_context_.is_ep_ctx_graph)) {
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -105,133 +105,133 @@ common::Status OpenVINOExecutionProvider::Compile(
 
   try {
     bool is_epctx_model = false;
-    if (!fused_nodes.empty()) {
-      // Assume these properties are constant for all the model subgraphs, otherwise move to SubGraphContext
-      const auto& graph_body_viewer_0 = fused_nodes[0].filtered_graph.get();
-      session_context_.onnx_model_path_name = graph_body_viewer_0.ModelPath().string();
-      session_context_.onnx_opset_version =
-          graph_body_viewer_0.DomainToVersionMap().at(kOnnxDomain);
-
-      // OVIR wrapped in epctx should be treated as source but this code does not
-      // This corner case is not in use and will be addressed in a future commit
-      is_epctx_model = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(graph_body_viewer_0);
-    }
-
-    // The block below is executed during EP context model inference
-    auto& metadata = shared_context_->shared_weights.metadata;  // Metadata object in memory
-    if (session_context_.so_share_ep_contexts &&
-        is_epctx_model &&
-        metadata.empty()) {
-      fs::path context_model_file_path = session_context_.so_context_file_path;
-      if (context_model_file_path.empty()) {
-        // If ep.context_file_path is not set the input model path is used
-        context_model_file_path = session_context_.onnx_model_path_name;
-      }
+  if (!fused_nodes.empty()) {
+    // Assume these properties are constant for all the model subgraphs, otherwise move to SubGraphContext
+    const auto& graph_body_viewer_0 = fused_nodes[0].filtered_graph.get();
+    session_context_.onnx_model_path_name = graph_body_viewer_0.ModelPath().string();
+    session_context_.onnx_opset_version =
+        graph_body_viewer_0.DomainToVersionMap().at(kOnnxDomain);
+
+    // OVIR wrapped in epctx should be treated as source but this code does not
+    // This corner case is not in use and will be addressed in a future commit
+    is_epctx_model = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(graph_body_viewer_0);
+  }
 
-      // Metadata is always read from model location, this could be a source or epctx model
-      fs::path metadata_filename = context_model_file_path.stem().string() + "_metadata.bin";
-      fs::path metadata_file_path = context_model_file_path.parent_path() / metadata_filename;
-      std::ifstream file(metadata_file_path, std::ios::binary);
-      ORT_RETURN_IF_NOT(file, "Metadata file was not found: " + metadata_file_path.string());
-      shared_context_->shared_weights.metadata_filepath = std::move(metadata_file_path);
-      file >> metadata;
+  // The block below is executed during EP context model inference
+  auto& metadata = shared_context_->shared_weights.metadata;  // Metadata object in memory
+  if (session_context_.so_share_ep_contexts &&
+      is_epctx_model &&
+      metadata.empty()) {
+    fs::path context_model_file_path = session_context_.so_context_file_path;
+    if (context_model_file_path.empty()) {
+      // If ep.context_file_path is not set the input model path is used
+      context_model_file_path = session_context_.onnx_model_path_name;
     }
 
-    struct OpenVINOEPFunctionState {
-      AllocateFunc allocate_func = nullptr;
-      DestroyFunc destroy_func = nullptr;
-      AllocatorHandle allocator_handle = nullptr;
-      BackendManager& backend_manager;
-    };
+    // Metadata is always read from model location, this could be a source or epctx model
+    fs::path metadata_filename = context_model_file_path.stem().string() + "_metadata.bin";
+    fs::path metadata_file_path = context_model_file_path.parent_path() / metadata_filename;
+    std::ifstream file(metadata_file_path, std::ios::binary);
+    ORT_RETURN_IF_NOT(file, "Metadata file was not found: " + metadata_file_path.string());
+    shared_context_->shared_weights.metadata_filepath = std::move(metadata_file_path);
+    file >> metadata;
+  }
 
-    for (const FusedNodeAndGraph& fused_node_graph : fused_nodes) {
-      const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph;
-      const Node& fused_node = fused_node_graph.fused_node;
-
-      NodeComputeInfo compute_info;
-
-      // During backend creation, we check if user wants to use precompiled blob onnx model or the original model
-      // For precompiled blob, directly load the model instead of compiling the model
-      // For original model, check if the user wants to export a model with pre-compiled blob
-
-      auto& backend_manager = backend_managers_.emplace_back(session_context_,
-                                                             *shared_context_,
-                                                             fused_node,
-                                                             graph_body_viewer,
-                                                             logger,
-                                                             ep_ctx_handle_);
-
-      compute_info.create_state_func =
-          [&backend_manager](ComputeContext* context, FunctionState* state) {
-            OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState{
-                .allocate_func = context->allocate_func,
-                .destroy_func = context->release_func,
-                .allocator_handle = context->allocator_handle,
-                .backend_manager = backend_manager};
-            *state = static_cast<FunctionState>(p);
-            return 0;
-          };
-
-      compute_info.compute_func = [](FunctionState state, const OrtApi* /* api */, OrtKernelContext* context) {
-        auto function_state = static_cast<OpenVINOEPFunctionState*>(state);
-        try {
-          function_state->backend_manager.Compute(context);
-        } catch (const std::exception& ex) {
-          return common::Status(common::ONNXRUNTIME, common::FAIL, ex.what());
-        }
-        return Status::OK();
-      };
+  struct OpenVINOEPFunctionState {
+    AllocateFunc allocate_func = nullptr;
+    DestroyFunc destroy_func = nullptr;
+    AllocatorHandle allocator_handle = nullptr;
+    BackendManager& backend_manager;
+  };
+
+  for (const FusedNodeAndGraph& fused_node_graph : fused_nodes) {
+    const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph;
+    const Node& fused_node = fused_node_graph.fused_node;
+
+    NodeComputeInfo compute_info;
+
+    // During backend creation, we check if user wants to use precompiled blob onnx model or the original model
+    // For precompiled blob, directly load the model instead of compiling the model
+    // For original model, check if the user wants to export a model with pre-compiled blob
+
+    auto& backend_manager = backend_managers_.emplace_back(session_context_,
+                                                           *shared_context_,
+                                                           fused_node,
+                                                           graph_body_viewer,
+                                                           logger,
+                                                           ep_ctx_handle_);
+
+    compute_info.create_state_func =
+        [&backend_manager](ComputeContext* context, FunctionState* state) {
+          OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState{
+              .allocate_func = context->allocate_func,
+              .destroy_func = context->release_func,
+              .allocator_handle = context->allocator_handle,
+              .backend_manager = backend_manager};
+          *state = static_cast<FunctionState>(p);
+          return 0;
+        };
+
+    compute_info.compute_func = [](FunctionState state, const OrtApi* /* api */, OrtKernelContext* context) {
+      auto function_state = static_cast<OpenVINOEPFunctionState*>(state);
+      try {
+        function_state->backend_manager.Compute(context);
+      } catch (const std::exception& ex) {
+        return common::Status(common::ONNXRUNTIME, common::FAIL, ex.what());
+      }
+      return Status::OK();
+    };
 
-      compute_info.release_state_func =
-          [](FunctionState state) {
-            if (state) {
-              OpenVINOEPFunctionState* function_state = static_cast<OpenVINOEPFunctionState*>(state);
-              delete function_state;
-            }
-          };
+    compute_info.release_state_func =
+        [](FunctionState state) {
+          if (state) {
+            OpenVINOEPFunctionState* function_state = static_cast<OpenVINOEPFunctionState*>(state);
+            delete function_state;
+          }
+        };
 
-      node_compute_funcs.push_back(std::move(compute_info));
+    node_compute_funcs.push_back(std::move(compute_info));
 
-      if (!status.IsOK()) {
-        break;
-      }
+    if (!status.IsOK()) {
+      break;
     }
+  }
 
-    // The block below is executed during EP context model generation
-    if (session_context_.so_context_enable &&
-        session_context_.so_share_ep_contexts &&
-        !metadata.empty()) {
-      // For models after the first the metadata name comes from the shared context
-      fs::path metadata_file_path = shared_context_->shared_weights.metadata_filepath;
+  // The block below is executed during EP context model generation
+  if (session_context_.so_context_enable &&
+      session_context_.so_share_ep_contexts &&
+      !metadata.empty()) {
+    // For models after the first the metadata name comes from the shared context
+    fs::path metadata_file_path = shared_context_->shared_weights.metadata_filepath;
+    if (metadata_file_path.empty()) {
+      metadata_file_path = session_context_.so_context_file_path;
+      std::string name_append{"_metadata.bin"};
       if (metadata_file_path.empty()) {
-        metadata_file_path = session_context_.so_context_file_path;
-        std::string name_append{"_metadata.bin"};
-        if (metadata_file_path.empty()) {
-          metadata_file_path = session_context_.onnx_model_path_name;
-          name_append = "_ctx" + name_append;
-        }
-        auto metadata_filename = metadata_file_path.stem().string() + name_append;
-        metadata_file_path.replace_filename(metadata_filename);
-        shared_context_->shared_weights.metadata_filepath = metadata_file_path;
+        metadata_file_path = session_context_.onnx_model_path_name;
+        name_append = "_ctx" + name_append;
       }
-
-      // Metadata is generated only for shared contexts
-      // If saving metadata then save it to the provided path or use the original model path
-      // Multiple calls to Compile() will update the metadata and for the last call
-      //   the resulting file will contain the aggregated content
-      std::ofstream file{metadata_file_path, std::ios::binary};
-      ORT_RETURN_IF_NOT(file, "Metadata file could not be written: ", metadata_file_path);
-      file << metadata;
+      auto metadata_filename = metadata_file_path.stem().string() + name_append;
+      metadata_file_path.replace_filename(metadata_filename);
+      shared_context_->shared_weights.metadata_filepath = metadata_file_path;
     }
-  } catch (const ovep_exception& ex) {
-    status = ex;
+
+    // Metadata is generated only for shared contexts
+    // If saving metadata then save it to the provided path or use the original model path
+    // Multiple calls to Compile() will update the metadata and for the last call
+    //   the resulting file will contain the aggregated content
+    std::ofstream file{metadata_file_path, std::ios::binary};
+    ORT_RETURN_IF_NOT(file, "Metadata file could not be written: ", metadata_file_path);
+    file << metadata;
   }
 
   if (session_context_.so_stop_share_ep_contexts) {
     if (shared_context_) {
       shared_context_->clear();
     }
   }
+  } catch (const ovep_exception& ex) {
+    status = ex;
+  }
 
   return status;
 }
diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc