intel · Kotomi-Du · Sep 2, 2025 · Sep 23, 2025
diff --git a/onnxruntime/contrib_ops/cpu/utils/debug_macros.h b/onnxruntime/contrib_ops/cpu/utils/debug_macros.h
@@ -6,7 +6,7 @@
 #ifdef DEBUG_GENERATION
 #define DUMP_TENSOR_LEVEL 2
 #else
-#define DUMP_TENSOR_LEVEL 0  // change it to 1 or 2 if want to enable dumping for code not in generation.
+#define DUMP_TENSOR_LEVEL 1  // change it to 1 or 2 if want to enable dumping for code not in generation.
 #endif
 
 #define DUMP_CPU_TENSOR_LEVEL DUMP_TENSOR_LEVEL

diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h
@@ -57,6 +57,7 @@ struct OnnxToOvNetworkBindings {
       "beam_idx",
       "past_key_values",
       "present",
+       "total_seq_len",
   };
 
   OnnxToOvNetworkBindings(OVExeNetwork& exec_network, SubGraphContext& subgraph_context, SessionContext& session_context) {
@@ -72,15 +73,15 @@ struct OnnxToOvNetworkBindings {
         // stateful representation has introduced these new tensors, creating a name mismatch (matched_names=false).
         // So, if there is a name mismatch, or the name matches our special io list, we simply continue processing
         // here to prevent runtime exceptions.
-        if (session_context.enable_causallm) {
+        //if (session_context.enable_causallm) {
           if (!matched_names ||
               std::any_of(special_io_names_.begin(), special_io_names_.end(),
                           [&onnx_name](const std::string& name) { return onnx_name.find(name) != std::string::npos; })) {
             // This case also requires dynamic shape inference, so we'll mark the bindings as dynamic.
             has_dynamic_io_ = true;
             continue;
           }
-        }
+        //}
 
         ORT_ENFORCE(matched_names, log_tag,
                     "Input names mismatch between OpenVINO and ONNX. ", onnx_name,

diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
@@ -96,6 +96,7 @@ std::vector<SupportedOp> supported_op_mode = {
     {"Atanh", V_2020_4, {"CPU"}},
     {"Atanh", V_2022_1, {"GPU"}},
     {"Attention", V_2023_0, {"CPU", "GPU"}},
+    {"GroupQueryAttention", V_2023_0, {"CPU", "GPU"}},
     {"AveragePool", V_2020_4, {"CPU", "GPU"}},
     {"BatchNormalization", V_2020_4, {"CPU", "GPU"}},
     {"BiasGelu", V_2023_0, {"CPU", "GPU"}},

diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py
@@ -386,6 +386,9 @@ def generate_build_tree(
             "-Donnxruntime_ENABLE_PIX_FOR_WEBGPU_EP=" + ("ON" if args.enable_pix_capture else "OFF"),
         ]
 
+        if 1: #args.dump_node_input_output:
+            cmake_args.append("-Donnxruntime_DEBUG_NODE_INPUTS_OUTPUTS=ON")
+
         if args.caller_framework:
             cmake_args.append("-Donnxruntime_CALLER_FRAMEWORK=" + args.caller_framework)
         if args.winml_root_namespace_override:

diff --git a/tools/ci_build/build_args.py b/tools/ci_build/build_args.py
@@ -842,6 +842,12 @@ def convert_arg_line_to_args(self, arg_line: str) -> list[str]:  # Use list[str]
         fromfile_prefix_chars="@",  # Allow args from file (@filename)
     )
 
+    # dump_node_input_output
+    parser.add_argument(
+        "--dump_node_input_output",
+        type=str,
+        help="Dump node input/output data to files in the specified directory.",
+    )
     # Add arguments by category
     add_core_build_args(parser)
     add_cmake_build_config_args(parser)