Skip to content

Commit 7f90473

Browse files
committed
Merge branch 'master' into sync_msft_03092025
2 parents 569848c + 5537d33 commit 7f90473

File tree

13 files changed

+430
-48
lines changed

13 files changed

+430
-48
lines changed

include/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_options.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ constexpr const char* kProfilesOptShapes = "nv_profile_opt_shapes";
3434
constexpr const char* kCudaGraphEnable = "enable_cuda_graph";
3535
constexpr const char* kMultiProfileEnable = "nv_multi_profile_enable";
3636
constexpr const char* kUseExternalDataInitializer = "nv_use_external_data_initializer";
37+
constexpr const char* kRuntimeCacheFile = "nv_runtime_cache_path";
3738

3839
} // namespace provider_option_names
3940
namespace run_option_names {

js/web/docs/webnn-operators.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ platforms. Check the [WebNN status](https://webmachinelearning.github.io/webnn-s
3232
| Div | ai.onnx(7-12, 13, 14+) | div | |
3333
| DequantizeLinear | ai.onnx(10-12, 13-18, 19-20, 21-22, 23+) | dequantizeLinear | The shape of x_scale should be a subsample of the shape of input |
3434
| Dropout | ai.onnx(7-9, 10-11, 12, 13-21, 22+) | identity | Only supports test mode |
35+
| DynamicQuantizeLinear | ai.onnx(11+) | cast, clamp, div, div, max, min, quantizeLinear, reduceMax, reduceMin, reshape, roundEven, sub | |
3536
| Einsum | ai.onnx(12+) | reshape, transpose, matmul, reduceSum, mul, triangular | |
3637
| Elu | ai.onnx(7+) | elu | |
3738
| Equal | ai.onnx(7-10, 11-12, 13-18, 19+) | equal | |

onnxruntime/core/mlas/lib/mlasi.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2280,7 +2280,7 @@ MLAS_FLOAT32X4
22802280
MlasMultiplyAddFloat32x4(MLAS_FLOAT32X4 Vector1, MLAS_FLOAT32X4 Vector2, MLAS_FLOAT32X4 Vector3)
22812281
{
22822282
#if defined(MLAS_NEON_INTRINSICS)
2283-
return vmlaq_f32(Vector3, Vector1, Vector2);
2283+
return vfmaq_f32(Vector3, Vector1, Vector2);
22842284
#elif defined(MLAS_FMA3_INTRINSICS)
22852285
return _mm_fmadd_ps(Vector1, Vector2, Vector3);
22862286
#elif defined(MLAS_SSE2_INTRINSICS)

onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc

Lines changed: 73 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// Licensed under the MIT License.
44
#include <fstream>
55
#include <list>
6+
#include <thread>
67
#include <unordered_set>
78
#include "core/providers/shared_library/provider_api.h"
89
#include "core/providers/nv_tensorrt_rtx/nv_provider_options.h"
@@ -654,9 +655,9 @@ void NvExecutionProvider::PerThreadContext::ResetTensorRTContext(std::string fus
654655
}
655656
}
656657

657-
bool NvExecutionProvider::PerThreadContext::UpdateTensorRTContext(std::string fused_node, std::unique_ptr<nvinfer1::IExecutionContext> context) {
658+
bool NvExecutionProvider::PerThreadContext::UpdateTensorRTContext(std::string fused_node, tensorrt_ptr::unique_pointer_exec_ctx context) {
658659
if (!context) {
659-
context = std::make_unique<nvinfer1::IExecutionContext>();
660+
context = tensorrt_ptr::unique_pointer_exec_ctx();
660661
}
661662
trt_context_map_[fused_node] = std::move(context);
662663

@@ -757,11 +758,11 @@ bool NvExecutionProvider::PerThreadContext::IsTensorRTContextInMap(std::string f
757758
nvinfer1::IExecutionContext& NvExecutionProvider::PerThreadContext::GetTensorRTContext(std::string fused_node) {
758759
auto it = trt_context_map_.find(fused_node);
759760
if (it != trt_context_map_.end()) {
760-
return *(it->second); // dereference shared pointer
761+
return *(it->second.get()); // dereference shared pointer
761762
}
762-
auto context = std::make_unique<nvinfer1::IExecutionContext>();
763+
auto context = tensorrt_ptr::unique_pointer_exec_ctx();
763764
trt_context_map_[fused_node] = std::move(context);
764-
return *(trt_context_map_[fused_node]); // dereference shared pointer
765+
return *(trt_context_map_[fused_node].get()); // dereference shared pointer
765766
}
766767

767768
void NvExecutionProvider::ReleasePerThreadContext() const {
@@ -870,6 +871,20 @@ NvExecutionProvider::NvExecutionProvider(const NvExecutionProviderInfo& info)
870871
max_shared_mem_size_ = info.max_shared_mem_size;
871872
dump_subgraphs_ = info.dump_subgraphs;
872873
weight_stripped_engine_enable_ = info.weight_stripped_engine_enable;
874+
// make runtime cache path absolute and create directory if it doesn't exist
875+
if (!info.runtime_cache_path.empty()) {
876+
std::filesystem::path p(info.runtime_cache_path);
877+
std::filesystem::path abs_path = std::filesystem::absolute(p);
878+
const auto& env = GetDefaultEnv();
879+
auto status = env.CreateFolder(abs_path.string());
880+
if (!status.IsOK()) {
881+
LOGS_DEFAULT(WARNING) << "[NvTensorRTRTX EP] The runtime cache directory could not be created at: " << abs_path
882+
<< ". Runtime cache is disabled.";
883+
} else {
884+
runtime_cache_ = abs_path;
885+
}
886+
}
887+
873888
onnx_model_folder_path_ = info.onnx_model_folder_path;
874889
onnx_model_bytestream_ = info.onnx_bytestream;
875890
onnx_model_bytestream_size_ = info.onnx_bytestream_size;
@@ -1053,7 +1068,13 @@ NvExecutionProvider::NvExecutionProvider(const NvExecutionProviderInfo& info)
10531068
<< ", nv_onnx_model_bytestream_size_: " << onnx_model_bytestream_size_
10541069
<< ", nv_onnx_external_bytestream_size_: " << onnx_external_data_bytestream_size_
10551070
<< ", nv_use_external_data_initializer_: " << use_external_data_initializer_
1056-
<< ", nv_op_types_to_exclude: " << op_types_to_exclude_;
1071+
<< ", nv_op_types_to_exclude: " << op_types_to_exclude_
1072+
<< ", nv_runtime_cache_path: " << runtime_cache_;
1073+
}
1074+
1075+
Status NvExecutionProvider::Sync() const {
1076+
CUDA_RETURN_IF_ERROR(cudaStreamSynchronize(stream_));
1077+
return Status::OK();
10571078
}
10581079

10591080
NvExecutionProvider::~NvExecutionProvider() {
@@ -1574,8 +1595,8 @@ SubGraphCollection_t NvExecutionProvider::GetSupportedList(SubGraphCollection_t
15741595
// the initializer was marked as external data by the ORT graph at load time since it was provided in memory
15751596
size_t size = 0;
15761597
const void* ptr = nullptr;
1577-
c_api.GetTensorSizeInBytes(&initializer_value, &size);
1578-
c_api.GetTensorData(&initializer_value, &ptr);
1598+
Ort::ThrowOnError(c_api.GetTensorSizeInBytes(&initializer_value, &size));
1599+
Ort::ThrowOnError(c_api.GetTensorData(&initializer_value, &ptr));
15791600
userWeights.emplace_back(tp->name(), ptr, size);
15801601
} else if (utils::HasExternalDataInMemory(*tp)) {
15811602
// only copy and take ownership of the data if none of the above conditions are met
@@ -2394,8 +2415,8 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphViewer& gr
23942415
// the initializer was marked as external data by the ORT graph at load time since it was provided in memory
23952416
size_t size = 0;
23962417
const void* ptr = nullptr;
2397-
c_api.GetTensorSizeInBytes(&initializer_value, &size);
2398-
c_api.GetTensorData(&initializer_value, &ptr);
2418+
Ort::ThrowOnError(c_api.GetTensorSizeInBytes(&initializer_value, &size));
2419+
Ort::ThrowOnError(c_api.GetTensorData(&initializer_value, &ptr));
23992420
userWeights.emplace_back(tp->name(), ptr, size);
24002421
} else if (utils::HasExternalDataInMemory(*tp)) {
24012422
// only copy and take ownership of the data if none of the above conditions are met
@@ -2631,8 +2652,10 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphViewer& gr
26312652
//
26322653
// Otherwise engine will be handled at inference time.
26332654
std::unique_ptr<nvinfer1::ICudaEngine> trt_engine;
2634-
std::unique_ptr<nvinfer1::IExecutionContext> trt_context;
2655+
tensorrt_ptr::unique_pointer_exec_ctx trt_context;
2656+
std::unique_ptr<nvinfer1::IRuntimeCache> trt_runtime_cache;
26352657
std::unique_ptr<nvinfer1::IRuntimeConfig> trt_runtime_config;
2658+
std::string runtime_cache_file = "";
26362659

26372660
// Generate file name for dumping ep context model
26382661
if (dump_ep_context_model_ && ctx_model_path_.empty()) {
@@ -2661,6 +2684,18 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphViewer& gr
26612684
trt_runtime_config->setDynamicShapesKernelSpecializationStrategy(nvinfer1::DynamicShapesKernelSpecializationStrategy::kEAGER);
26622685
}
26632686
trt_runtime_config->setExecutionContextAllocationStrategy(nvinfer1::ExecutionContextAllocationStrategy::kUSER_MANAGED);
2687+
if (!runtime_cache_.empty()) {
2688+
runtime_cache_file = (runtime_cache_ / fused_node.Name()).string();
2689+
trt_runtime_cache = std::unique_ptr<nvinfer1::IRuntimeCache>(trt_runtime_config->createRuntimeCache());
2690+
auto cache_data = file_utils::ReadFile(runtime_cache_file);
2691+
if (!trt_runtime_cache->deserialize(cache_data.data(), cache_data.size())) {
2692+
trt_runtime_cache = std::unique_ptr<nvinfer1::IRuntimeCache>(trt_runtime_config->createRuntimeCache());
2693+
LOGS_DEFAULT(INFO) << "TensorRT RTX failed to deserialize the runtime cache, will overwrite with new one" << std::endl;
2694+
}
2695+
if (!trt_runtime_config->setRuntimeCache(*trt_runtime_cache)) {
2696+
LOGS_DEFAULT(INFO) << "TensorRT RTX failed to set the runtime cache" << std::endl;
2697+
}
2698+
}
26642699

26652700
if (detailed_build_log_) {
26662701
auto engine_build_stop = std::chrono::steady_clock::now();
@@ -2721,7 +2756,9 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphViewer& gr
27212756
// Build context
27222757
// Note: Creating an execution context from an engine is thread safe per TRT doc
27232758
// https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#threading
2724-
trt_context = std::unique_ptr<nvinfer1::IExecutionContext>(trt_engine->createExecutionContext(trt_runtime_config.get()));
2759+
trt_context = tensorrt_ptr::unique_pointer_exec_ctx(
2760+
trt_engine->createExecutionContext(trt_runtime_config.get()),
2761+
tensorrt_ptr::IExecutionContextDeleter(runtime_cache_file, std::move(trt_runtime_cache)));
27252762
if (!trt_context) {
27262763
return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
27272764
"NvTensorRTRTX EP could not build execution context for fused node: " + fused_node.Name());
@@ -3002,7 +3039,7 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromPrecompiledEngine(const Gra
30023039
std::unordered_map<std::string, size_t>& output_map,
30033040
std::vector<NodeComputeInfo>& node_compute_funcs) {
30043041
std::unique_ptr<nvinfer1::ICudaEngine> trt_engine;
3005-
std::unique_ptr<nvinfer1::IExecutionContext> trt_context;
3042+
tensorrt_ptr::unique_pointer_exec_ctx trt_context;
30063043
std::unordered_map<std::string, size_t> input_indexes; // TRT engine input name -> ORT kernel context input index
30073044
std::unordered_map<std::string, size_t> output_indexes; // TRT engine output name -> ORT kernel context output index
30083045
std::unordered_map<std::string, size_t> output_types; // TRT engine output name -> ORT output tensor type
@@ -3024,11 +3061,33 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromPrecompiledEngine(const Gra
30243061
return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, status.ErrorMessage());
30253062
}
30263063

3064+
std::unique_ptr<nvinfer1::IRuntimeCache> trt_runtime_cache;
3065+
auto trt_runtime_config = std::unique_ptr<nvinfer1::IRuntimeConfig>(trt_engine->createRuntimeConfig());
3066+
if (trt_runtime_config && cuda_graph_enable_) {
3067+
trt_runtime_config->setDynamicShapesKernelSpecializationStrategy(nvinfer1::DynamicShapesKernelSpecializationStrategy::kEAGER);
3068+
}
3069+
trt_runtime_config->setExecutionContextAllocationStrategy(nvinfer1::ExecutionContextAllocationStrategy::kUSER_MANAGED);
3070+
std::string runtime_cache_file = "";
3071+
if (!runtime_cache_.empty()) {
3072+
runtime_cache_file = (runtime_cache_ / graph_body_viewer.GetNode(node_idx)->Name()).string();
3073+
trt_runtime_cache = std::unique_ptr<nvinfer1::IRuntimeCache>(trt_runtime_config->createRuntimeCache());
3074+
auto cache_data = file_utils::ReadFile(runtime_cache_file);
3075+
if (!trt_runtime_cache->deserialize(cache_data.data(), cache_data.size())) {
3076+
trt_runtime_cache = std::unique_ptr<nvinfer1::IRuntimeCache>(trt_runtime_config->createRuntimeCache());
3077+
LOGS_DEFAULT(INFO) << "TensorRT RTX failed to deserialize the runtime cache, will overwrite with new one" << std::endl;
3078+
}
3079+
if (!trt_runtime_config->setRuntimeCache(*trt_runtime_cache)) {
3080+
LOGS_DEFAULT(INFO) << "TensorRT RTX failed to set the runtime cache" << std::endl;
3081+
}
3082+
}
3083+
30273084
// Build context
30283085
//
30293086
// Note: Creating an execution context from an engine is thread safe per TRT doc
30303087
// https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#threading
3031-
trt_context = std::unique_ptr<nvinfer1::IExecutionContext>(trt_engine->createExecutionContext(nvinfer1::ExecutionContextAllocationStrategy::kUSER_MANAGED));
3088+
trt_context = tensorrt_ptr::unique_pointer_exec_ctx(
3089+
trt_engine->createExecutionContext(trt_runtime_config.get()),
3090+
tensorrt_ptr::IExecutionContextDeleter(runtime_cache_file, std::move(trt_runtime_cache)));
30323091
if (!trt_context) {
30333092
return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
30343093
"NvTensorRTRTX EP could not build execution context for fused node: " + fused_node.Name());

onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.h

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ typedef void* cudnnStatus_t;
1616
#include <mutex>
1717
#include "core/providers/cuda/cuda_graph.h"
1818
#include "nv_execution_provider_info.h"
19+
#include "core/providers/nv_tensorrt_rtx/nv_file_utils.h"
1920

2021
namespace onnxruntime {
2122

@@ -58,6 +59,26 @@ class TensorrtLogger : public nvinfer1::ILogger {
5859
};
5960

6061
namespace tensorrt_ptr {
62+
/*
63+
* custom deleter that will dump the optimized runtime cache when the execution context is destructed
64+
*/
65+
struct IExecutionContextDeleter {
66+
IExecutionContextDeleter() = default;
67+
IExecutionContextDeleter(const std::string& runtime_cache_path, std::unique_ptr<nvinfer1::IRuntimeCache>&& runtime_cache) : runtime_cache_path_(runtime_cache_path), runtime_cache_(std::move(runtime_cache)) {};
68+
void operator()(nvinfer1::IExecutionContext* context) {
69+
if (context != nullptr) {
70+
if (!runtime_cache_path_.empty()) {
71+
auto serialized_cache_data = std::unique_ptr<nvinfer1::IHostMemory>(runtime_cache_->serialize());
72+
file_utils::WriteFile(runtime_cache_path_, serialized_cache_data->data(), serialized_cache_data->size());
73+
}
74+
delete context;
75+
}
76+
}
77+
78+
private:
79+
std::string runtime_cache_path_;
80+
std::unique_ptr<nvinfer1::IRuntimeCache> runtime_cache_;
81+
};
6182

6283
struct TensorrtInferDeleter {
6384
template <typename T>
@@ -70,6 +91,7 @@ struct TensorrtInferDeleter {
7091

7192
template <typename T>
7293
using unique_pointer = std::unique_ptr<T, TensorrtInferDeleter>;
94+
using unique_pointer_exec_ctx = std::unique_ptr<nvinfer1::IExecutionContext, IExecutionContextDeleter>;
7395
}; // namespace tensorrt_ptr
7496

7597
//
@@ -196,7 +218,7 @@ struct TensorrtFuncState {
196218
std::string fused_node_name;
197219
nvinfer1::IBuilder* builder;
198220
std::unique_ptr<nvinfer1::ICudaEngine>* engine = nullptr;
199-
std::unique_ptr<nvinfer1::IExecutionContext>* context = nullptr;
221+
tensorrt_ptr::unique_pointer_exec_ctx* context = nullptr;
200222
std::unique_ptr<nvinfer1::INetworkDefinition>* network = nullptr;
201223
std::vector<std::unordered_map<std::string, size_t>> input_info;
202224
std::vector<std::unordered_map<std::string, size_t>> output_info;
@@ -233,7 +255,7 @@ struct TensorrtShortFuncState {
233255
AllocatorHandle allocator = nullptr;
234256
std::string fused_node_name;
235257
std::unique_ptr<nvinfer1::ICudaEngine>* engine = nullptr;
236-
std::unique_ptr<nvinfer1::IExecutionContext>* context = nullptr;
258+
tensorrt_ptr::unique_pointer_exec_ctx* context = nullptr;
237259
std::vector<std::unordered_map<std::string, size_t>> input_info;
238260
std::vector<std::unordered_map<std::string, size_t>> output_info;
239261
std::mutex* tensorrt_mu_ptr = nullptr;
@@ -285,6 +307,7 @@ class NvExecutionProvider : public IExecutionProvider {
285307
IResourceAccountant* /* resource_accountant */) const override;
286308

287309
int GetDeviceId() const { return device_id_; }
310+
Status Sync() const;
288311

289312
common::Status Compile(const std::vector<FusedNodeAndGraph>& fused_nodes_and_graphs,
290313
std::vector<NodeComputeInfo>& node_compute_funcs) override;
@@ -356,6 +379,7 @@ class NvExecutionProvider : public IExecutionProvider {
356379
bool detailed_build_log_ = false;
357380
bool cuda_graph_enable_ = false;
358381
bool multi_profile_enable_ = false;
382+
std::filesystem::path runtime_cache_;
359383
std::string cache_prefix_;
360384
std::string op_types_to_exclude_;
361385
int nv_profile_index_ = 0;
@@ -386,7 +410,7 @@ class NvExecutionProvider : public IExecutionProvider {
386410
// But there are still some thread safe operations, please see here https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#threading
387411
// For those non thread safe operations, TRT EP uses (1) lock_guard or (2) PerThreadContext to make sure synchronization.
388412
std::unordered_map<std::string, std::unique_ptr<nvinfer1::ICudaEngine>> engines_;
389-
std::unordered_map<std::string, std::unique_ptr<nvinfer1::IExecutionContext>> contexts_;
413+
std::unordered_map<std::string, tensorrt_ptr::unique_pointer_exec_ctx> contexts_;
390414
std::unordered_map<std::string, std::unique_ptr<nvinfer1::IBuilder>> builders_;
391415
std::unordered_map<std::string, std::unique_ptr<nvinfer1::INetworkDefinition>> networks_;
392416
std::unordered_map<std::string, std::vector<std::unordered_map<std::string, size_t>>> input_info_;
@@ -424,7 +448,7 @@ class NvExecutionProvider : public IExecutionProvider {
424448

425449
bool IsTensorRTContextInMap(std::string fused_node);
426450
nvinfer1::IExecutionContext& GetTensorRTContext(std::string fused_node);
427-
bool UpdateTensorRTContext(std::string fused_node, std::unique_ptr<nvinfer1::IExecutionContext> context);
451+
bool UpdateTensorRTContext(std::string fused_node, tensorrt_ptr::unique_pointer_exec_ctx context);
428452
void ResetTensorRTContext(std::string fused_node);
429453

430454
// CUDA Graph management
@@ -454,7 +478,7 @@ class NvExecutionProvider : public IExecutionProvider {
454478
// See more details here:
455479
// https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#threading
456480
// https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/classnvinfer1_1_1_i_execution_context.html#a63cd95430852038ce864e17c670e0b36
457-
std::unordered_map<std::string, std::unique_ptr<nvinfer1::IExecutionContext>> trt_context_map_;
481+
std::unordered_map<std::string, tensorrt_ptr::unique_pointer_exec_ctx> trt_context_map_;
458482

459483
// The profile shape ranges for the engine that the execution context maintained by the PerThreadContext is built with.
460484
// TRT EP needs this info to determine whether to rebuild the execution context.

onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ NvExecutionProviderInfo NvExecutionProviderInfo::FromProviderOptions(const Provi
5151
.AddAssignmentToReference(nv::provider_option_names::kCudaGraphEnable, info.cuda_graph_enable)
5252
.AddAssignmentToReference(nv::provider_option_names::kUseExternalDataInitializer, info.use_external_data_initializer)
5353
.AddAssignmentToReference(nv::provider_option_names::kMultiProfileEnable, info.multi_profile_enable)
54+
.AddAssignmentToReference(nv::provider_option_names::kRuntimeCacheFile, info.runtime_cache_path)
5455
.Parse(options)); // add new provider option here.
5556

5657
info.user_compute_stream = user_compute_stream;
@@ -105,7 +106,8 @@ ProviderOptions NvExecutionProviderInfo::ToProviderOptions(const NvExecutionProv
105106
{nv::provider_option_names::kProfilesMaxShapes, MakeStringWithClassicLocale(info.profile_max_shapes)},
106107
{nv::provider_option_names::kProfilesOptShapes, MakeStringWithClassicLocale(info.profile_opt_shapes)},
107108
{nv::provider_option_names::kCudaGraphEnable, MakeStringWithClassicLocale(info.cuda_graph_enable)},
108-
{nv::provider_option_names::kUseExternalDataInitializer, MakeStringWithClassicLocale(info.use_external_data_initializer)}};
109+
{nv::provider_option_names::kUseExternalDataInitializer, MakeStringWithClassicLocale(info.use_external_data_initializer)},
110+
{nv::provider_option_names::kRuntimeCacheFile, MakeStringWithClassicLocale(info.runtime_cache_path)}};
109111
return options;
110112
}
111113
} // namespace onnxruntime

onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ struct NvExecutionProviderInfo {
3737
bool engine_decryption_enable{false};
3838
std::string engine_decryption_lib_path{""};
3939
bool force_sequential_engine_build{false};
40-
std::string timing_cache_path{""};
40+
std::string runtime_cache_path{""};
4141
bool detailed_build_log{false};
4242
bool sparsity_enable{false};
4343
int auxiliary_streams{-1};

0 commit comments

Comments
 (0)