Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TensorRT EP] Use TRT/CUDA/ORT version from runtime instead of build time to generate hash value #22921

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1726,8 +1726,12 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
}

trt_version_ = getInferLibVersion();
CUDA_CALL_THROW(cudaRuntimeGetVersion(&cuda_version_));
ort_version_ = Ort::GetVersionString();

LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] TensorRT version is " << trt_version_;
LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] CUDA version is " << cuda_version_;
LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] ORT version is " << cuda_version_;

LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] TensorRT provider options: "
<< "device_id: " << device_id_
Expand Down Expand Up @@ -2453,13 +2457,13 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph,
// So, simply return the ComputeCapability here.
if (graph.NumberOfNodes() == 1 && GraphHasCtxNode(graph)) {
SubGraph_t supported_node_vector = {{0}, true};
std::unique_ptr<IndexedSubGraph> sub_graph = GetSubGraph(supported_node_vector, graph, TRTGenerateId(graph), 0);
std::unique_ptr<IndexedSubGraph> sub_graph = GetSubGraph(supported_node_vector, graph, TRTGenerateId(graph, std::to_string(trt_version_), std::to_string(cuda_version_), ort_version_), 0);
result.push_back(ComputeCapability::Create(std::move(sub_graph)));
return result;
}

// Generate unique kernel name for TRT graph
HashValue model_hash = TRTGenerateId(graph);
HashValue model_hash = TRTGenerateId(graph, std::to_string(trt_version_), std::to_string(cuda_version_), ort_version_);

// Get supported node list from TensorRT parser
const int number_of_ort_nodes = graph.NumberOfNodes();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,8 @@ class TensorrtExecutionProvider : public IExecutionProvider {

// The format is as for TENSORRT_VERSION: (MAJOR * 100 + MINOR) * 100 + PATCH
int32_t trt_version_;
int32_t cuda_version_;
std::string ort_version_;

// The OrtAllocator object will be get during ep compute time
// and should be kept for the lifetime of TRT EP object.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@ void RemoveCachesByType(const std::string& root, std::string file_extension) {
* compiled kernels, so the name must be unique and deterministic across models and sessions.
* </remarks>
*/
HashValue TRTGenerateId(const GraphViewer& graph_viewer) {
HashValue TRTGenerateId(const GraphViewer& graph_viewer, std::string trt_version, std::string cuda_version, std::string ort_version) {
HashValue model_hash = 0;

// find the top level graph
Expand Down Expand Up @@ -579,16 +579,15 @@ HashValue TRTGenerateId(const GraphViewer& graph_viewer) {
#endif

#ifdef ORT_VERSION
hash_str(ORT_VERSION);
hash_str(ort_version);
#endif

#ifdef CUDA_VERSION
hash_str(std::to_string(CUDA_VERSION));
hash_str(cuda_version);
#endif

#if defined(NV_TENSORRT_MAJOR) && defined(NV_TENSORRT_MINOR)
std::string TRT_VERSION = std::to_string(NV_TENSORRT_MAJOR) + "." + std::to_string(NV_TENSORRT_MINOR);
hash_str(TRT_VERSION);
hash_str(trt_version);
#endif

model_hash = hash[0] | (uint64_t(hash[1]) << 32);
Expand Down
10 changes: 7 additions & 3 deletions onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -342,8 +342,12 @@ TEST(TensorrtExecutionProviderTest, TRTModelIdGeneratorUsingModelHashing) {
Graph& graph = model->MainGraph();
GraphViewer viewer(graph);

std::string trt_version = std::to_string(NV_TENSORRT_MAJOR) + "." + std::to_string(NV_TENSORRT_MINOR);
std::string cuda_version = std::to_string(CUDA_VERSION);
std::string ort_version = ORT_VERSION;

// get the hash for the model when loaded from file
HashValue model_hash = TRTGenerateId(viewer);
HashValue model_hash = TRTGenerateId(viewer, trt_version, cuda_version, ort_version);
ASSERT_NE(model_hash, 0);

// now load the model from bytes and check the hash differs
Expand All @@ -358,7 +362,7 @@ TEST(TensorrtExecutionProviderTest, TRTModelIdGeneratorUsingModelHashing) {
// Test loading same model from file and byte steam. Hash values should be different
Graph& graph2 = model2->MainGraph();
GraphViewer viewer2(graph2);
HashValue model_hash2 = TRTGenerateId(viewer2);
HashValue model_hash2 = TRTGenerateId(viewer2, trt_version, cuda_version, ort_version);
ASSERT_NE(model_hash, model_hash2);

// Test loading same model from different path, see if hash values are same as well
Expand All @@ -367,7 +371,7 @@ TEST(TensorrtExecutionProviderTest, TRTModelIdGeneratorUsingModelHashing) {
ASSERT_TRUE(Model::Load(model_path, model3, nullptr, DefaultLoggingManager().DefaultLogger()).IsOK());
Graph& graph3 = model3->MainGraph();
GraphViewer viewer3(graph3);
HashValue model_hash3 = TRTGenerateId(viewer3);
HashValue model_hash3 = TRTGenerateId(viewer3, trt_version, cuda_version, ort_version);
ASSERT_EQ(model_hash, model_hash3) << "model 1&3 are same models and they have same hash, no matter where they are loaded";
}

Expand Down
Loading