diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 3426a2781bbc6..ad37f7ed751ce 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -321,7 +321,7 @@ static bool IsQDQGraph(const onnxruntime::GraphViewer& graph_viewer) { } return false; } - +#if ((OPENVINO_VERSION_MAJOR < 2025) || ((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR < 4))) static bool Is16BitTensor(const onnxruntime::NodeArg* node_arg) { const auto* type_proto = node_arg ? node_arg->TypeAsProto() : nullptr; return type_proto && type_proto->has_tensor_type() && @@ -359,7 +359,7 @@ static bool IsQDQGraphWithUint16OrInt16(const onnxruntime::GraphViewer& graph_vi } return false; } - +#endif static void DumpOpenVINOEPModel([[maybe_unused]] const std::filesystem::path& onnx_model_path_name, [[maybe_unused]] ONNX_NAMESPACE::ModelProto* model_proto, [[maybe_unused]] const onnxruntime::Node& fused_node) { @@ -494,7 +494,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, // Check if the graph is QDQ and has int16 or uint16 quantization // If so, we will apply the QDQ scales fix transformation (for GPU device only) - bool is_qdq_graph_uint16_or_int16 = IsQDQGraphWithUint16OrInt16(subgraph); + //bool is_qdq_graph_uint16_or_int16 = IsQDQGraphWithUint16OrInt16(subgraph); const auto& onnx_model_path_name = subgraph.ModelPath(); // QDQ stripping enabled only for the NPU and experimentally on the GPU @@ -508,7 +508,9 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node); ORT_ENFORCE(status.IsOK(), status.ErrorMessage()); return model_proto; - } else if ((session_context_.device_type.find("GPU") != std::string::npos) && + } + #if ((OPENVINO_VERSION_MAJOR < 2025) || ((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR < 4))) + else if ((session_context_.device_type.find("GPU") != std::string::npos) && is_qdq_graph_uint16_or_int16) { // Create a copy of the model std::unique_ptr model; @@ -519,7 +521,9 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node); ORT_ENFORCE(status.IsOK(), status.ErrorMessage()); return model_proto; - } else { + } +#endif + else { LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled"; // scan ext initializers: diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc index 45e518d16686e..079d92ae82eb0 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.cc +++ b/onnxruntime/core/providers/openvino/backend_utils.cc @@ -60,7 +60,7 @@ CreateOVModel(std::string&& model, // Check for Constant Folding if ((session_context.device_type != "NPU") && !session_context.is_wholly_supported_graph) { ov::pass::ConstantFolding pass_const_obj; - pass_const_obj.run_on_model(ov_model); + //pass_const_obj.run_on_model(ov_model); auto& results = const_cast(ov_model.get()->get_results()); size_t index = results.size() - 1; diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc index 373b2121a9b60..dd9d9c938a10b 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc @@ -96,7 +96,7 @@ std::vector supported_op_mode = { {"Atanh", V_2020_4, {"CPU"}}, {"Atanh", V_2022_1, {"GPU"}}, {"Attention", V_2023_0, {"CPU", "GPU"}}, - {"GroupQueryAttention", V_2025_1, {"GPU"}}, + //{"GroupQueryAttention", V_2025_1, {"GPU"}}, {"AveragePool", V_2020_4, {"CPU", "GPU"}}, {"BatchNormalization", V_2020_4, {"CPU", "GPU"}}, {"BiasGelu", V_2023_0, {"CPU", "GPU"}},