Skip to content

Commit e8f86ab

Browse files
committed
Update TensorrtAPI to TensorRT 10
* delete retrieve_indices_by_name() * add member SampleUniquePtr<IRuntime> runtime * replace getBindingDimensions() by getTensorShape() * replace setBindingDimensions() by setInputShape() * add link_libraries(stdc++fs) to CMakeLists.txt * add include_directories("$ENV{TENSORRT_PATH}/samples/") to CMakeLists.txt
1 parent 09b5b5a commit e8f86ab

File tree

5 files changed

+30
-68
lines changed

5 files changed

+30
-68
lines changed

engine/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ include_directories("src/domain/crazyhouse")
402402
include_directories("src/agents")
403403
include_directories("src/agents/config")
404404
include_directories("src/nn")
405-
405+
link_libraries(stdc++fs)
406406

407407
if (BACKEND_MXNET)
408408
IF(DEFINED ENV{MXNET_PATH})
@@ -487,6 +487,7 @@ if (BACKEND_TENSORRT)
487487
endif()
488488
include_directories("$ENV{TENSORRT_PATH}/include")
489489
include_directories("$ENV{TENSORRT_PATH}/samples/common/")
490+
include_directories("$ENV{TENSORRT_PATH}/samples/")
490491
add_definitions(-DTENSORRT)
491492
endif()
492493

engine/src/environments/chess_related/chessbatchstream.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ int ChessBatchStream::getBatchSize() const
152152

153153
nvinfer1::Dims ChessBatchStream::getDims() const
154154
{
155-
Dims dims;
155+
nvinfer1::Dims dims;
156156
dims.nbDims = 4;
157157
dims.d[0] = mBatchSize;
158158
dims.d[1] = mDims.d[0];

engine/src/environments/chess_related/chessbatchstream.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ class ChessBatchStream : public IBatchStream
6868
int mBatchSize{0};
6969
int mBatchCount{0};
7070
int mMaxBatches{0};
71-
Dims mDims{};
71+
nvinfer1::Dims mDims{};
7272
std::vector<float> mData;
7373
std::vector<float> mLabels{};
7474
};

engine/src/nn/tensorrtapi.cpp

Lines changed: 23 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -88,59 +88,15 @@ void TensorrtAPI::load_parameters()
8888
// do nothing
8989
}
9090

91-
bool TensorrtAPI::retrieve_indices_by_name(bool verbose)
92-
{
93-
idxInput = engine->getBindingIndex(nnDesign.inputLayerName.c_str());
94-
if (idxInput == -1) {
95-
info_string_important("Layer name '" + nnDesign.inputLayerName + "' not found.");
96-
return false;
97-
}
98-
idxValueOutput = engine->getBindingIndex(nnDesign.valueOutputName.c_str());
99-
if (idxValueOutput == -1) {
100-
info_string_important("Layer name '" + nnDesign.valueOutputName + "' not found.");
101-
return false;
102-
}
103-
idxPolicyOutput = engine->getBindingIndex(nnDesign.policySoftmaxOutputName.c_str());
104-
if (idxPolicyOutput == -1) {
105-
info_string_important("Layer name '" + nnDesign.policySoftmaxOutputName + "' not found.");
106-
return false;
107-
}
108-
if (nnDesign.hasAuxiliaryOutputs) {
109-
idxAuxiliaryOutput = engine->getBindingIndex(nnDesign.auxiliaryOutputName.c_str());
110-
if (idxAuxiliaryOutput == -1) {
111-
info_string_important("Layer name '" + nnDesign.auxiliaryOutputName + "' not found.");
112-
return false;
113-
}
114-
}
115-
if (verbose) {
116-
info_string("Found 'idxInput' at index", idxInput);
117-
info_string("Found 'idxValueOutput' at index", idxValueOutput);
118-
info_string("Found 'idxPolicyOutput' at index", idxPolicyOutput);
119-
if (nnDesign.hasAuxiliaryOutputs) {
120-
info_string("Found 'idxAuxiliaryOutput' at index", idxAuxiliaryOutput);
121-
}
122-
}
123-
return true;
124-
}
125-
12691
void TensorrtAPI::init_nn_design()
12792
{
128-
nnDesign.hasAuxiliaryOutputs = engine->getNbBindings() > 3;
129-
if (!retrieve_indices_by_name(generatedTrtFromONNX)) {
130-
info_string_important("Fallback to default indices.");
131-
idxInput = nnDesign.inputIdx;
132-
idxValueOutput = nnDesign.valueOutputIdx + nnDesign.nbInputs;
133-
idxPolicyOutput = nnDesign.policyOutputIdx + nnDesign.nbInputs;
134-
idxAuxiliaryOutput = nnDesign.auxiliaryOutputIdx + nnDesign.nbInputs;
135-
}
136-
137-
set_shape(nnDesign.inputShape, engine->getBindingDimensions(idxInput));
93+
set_shape(nnDesign.inputShape, engine->getTensorShape(nnDesign.inputLayerName.c_str()));
13894
// make sure that the first dimension is the batch size, otherwise '-1' could cause problems
13995
nnDesign.inputShape.v[0] = batchSize;
140-
set_shape(nnDesign.valueOutputShape, engine->getBindingDimensions(idxValueOutput));
141-
set_shape(nnDesign.policyOutputShape, engine->getBindingDimensions(idxPolicyOutput));
96+
set_shape(nnDesign.valueOutputShape, engine->getTensorShape(nnDesign.valueOutputName.c_str()));
97+
set_shape(nnDesign.policyOutputShape, engine->getTensorShape(nnDesign.policySoftmaxOutputName.c_str()));
14298
if (nnDesign.hasAuxiliaryOutputs) {
143-
set_shape(nnDesign.auxiliaryOutputShape, engine->getBindingDimensions(idxAuxiliaryOutput));
99+
set_shape(nnDesign.auxiliaryOutputShape, engine->getTensorShape(nnDesign.auxiliaryOutputName.c_str()));
144100
}
145101
nnDesign.isPolicyMap = unsigned(nnDesign.policyOutputShape.v[1]) != StateConstants::NB_LABELS();
146102
}
@@ -151,7 +107,7 @@ void TensorrtAPI::bind_executor()
151107
context = SampleUniquePtr<nvinfer1::IExecutionContext>(engine->createExecutionContext());
152108
Dims inputDims;
153109
set_dims(inputDims, nnDesign.inputShape);
154-
context->setBindingDimensions(0, inputDims);
110+
context->setInputShape(nnDesign.inputLayerName.c_str(), inputDims);
155111

156112
// create buffers object with respect to the engine and batch size
157113
CHECK(cudaStreamCreate(&stream));
@@ -184,8 +140,19 @@ void TensorrtAPI::predict(float* inputPlanes, float* valueOutput, float* probOut
184140
CHECK(cudaMemcpyAsync(deviceMemory[idxInput], inputPlanes, memorySizes[idxInput],
185141
cudaMemcpyHostToDevice, stream));
186142

143+
context->setTensorAddress(nnDesign.inputLayerName.c_str(), deviceMemory[idxInput]);
144+
context->setTensorAddress(nnDesign.valueOutputName.c_str(), deviceMemory[idxValueOutput]);
145+
context->setTensorAddress(nnDesign.policySoftmaxOutputName.c_str(), deviceMemory[idxPolicyOutput]);
146+
#ifdef DYNAMIC_NN_ARCH
147+
if (has_auxiliary_outputs()) {
148+
#else
149+
if (StateConstants::NB_AUXILIARY_OUTPUTS()) {
150+
#endif
151+
context->setTensorAddress(nnDesign.auxiliaryOutputName.c_str(), deviceMemory[idxAuxiliaryOutput]);
152+
}
153+
187154
// run inference for given data
188-
context->enqueueV2(deviceMemory, stream, nullptr);
155+
context->enqueueV3(stream);
189156

190157
// copy output from device back to host
191158
CHECK(cudaMemcpyAsync(valueOutput, deviceMemory[idxValueOutput],
@@ -209,7 +176,6 @@ ICudaEngine* TensorrtAPI::create_cuda_engine_from_onnx()
209176
info_string("This may take a few minutes...");
210177
// create an engine builder
211178
SampleUniquePtr<IBuilder> builder = SampleUniquePtr<IBuilder>(createInferBuilder(gLogger.getTRTLogger()));
212-
builder->setMaxBatchSize(int(batchSize));
213179

214180
// create an ONNX network object
215181
const uint32_t explicitBatch = 1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
@@ -232,7 +198,7 @@ ICudaEngine* TensorrtAPI::create_cuda_engine_from_onnx()
232198
SampleUniquePtr<nvinfer1::IBuilderConfig> config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
233199
unique_ptr<IInt8Calibrator> calibrator;
234200
unique_ptr<IBatchStream> calibrationStream;
235-
set_config_settings(config, 1_GiB, calibrator, calibrationStream);
201+
set_config_settings(config, calibrator, calibrationStream);
236202

237203
IOptimizationProfile* profile = builder->createOptimizationProfile();
238204

@@ -243,12 +209,14 @@ ICudaEngine* TensorrtAPI::create_cuda_engine_from_onnx()
243209
profile->setDimensions(nnDesign.inputLayerName.c_str(), OptProfileSelector::kMAX, inputDims);
244210
config->addOptimizationProfile(profile);
245211

212+
nnDesign.hasAuxiliaryOutputs = network->getNbOutputs() > 2;
213+
246214
// build an engine from the TensorRT network with a given configuration struct
247215
#ifdef TENSORRT7
248216
return builder->buildEngineWithConfig(*network, *config);
249217
#else
250218
SampleUniquePtr<IHostMemory> serializedModel{builder->buildSerializedNetwork(*network, *config)};
251-
SampleUniquePtr<IRuntime> runtime{createInferRuntime(sample::gLogger.getTRTLogger())};
219+
runtime = SampleUniquePtr<IRuntime>(createInferRuntime(sample::gLogger.getTRTLogger()));
252220

253221
// build an engine from the serialized model
254222
return runtime->deserializeCudaEngine(serializedModel->data(), serializedModel->size());;
@@ -263,7 +231,7 @@ ICudaEngine* TensorrtAPI::get_cuda_engine() {
263231
const char* buffer = read_buffer(trtFilePath, bufferSize);
264232
if (buffer) {
265233
info_string("deserialize engine:", trtFilePath);
266-
unique_ptr<IRuntime, samplesCommon::InferDeleter> runtime{createInferRuntime(gLogger)};
234+
runtime = unique_ptr<IRuntime, samplesCommon::InferDeleter>{createInferRuntime(gLogger)};
267235
#ifdef TENSORRT7
268236
engine = runtime->deserializeCudaEngine(buffer, bufferSize, nullptr);
269237
#else
@@ -293,10 +261,9 @@ ICudaEngine* TensorrtAPI::get_cuda_engine() {
293261
}
294262

295263
void TensorrtAPI::set_config_settings(SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
296-
size_t maxWorkspace, unique_ptr<IInt8Calibrator>& calibrator,
264+
unique_ptr<IInt8Calibrator>& calibrator,
297265
unique_ptr<IBatchStream>& calibrationStream)
298266
{
299-
config->setMaxWorkspaceSize(maxWorkspace);
300267
switch (precision) {
301268
case float32:
302269
// default: do nothing

engine/src/nn/tensorrtapi.h

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include "BatchStream.h"
4545

4646
using namespace std;
47+
using namespace nvinfer1;
4748

4849
enum Precision {
4950
float32,
@@ -77,6 +78,7 @@ class TensorrtAPI : public NeuralNetAPI
7778
string trtFilePath;
7879
std::shared_ptr<nvinfer1::ICudaEngine> engine;
7980
SampleUniquePtr<nvinfer1::IExecutionContext> context;
81+
SampleUniquePtr<IRuntime> runtime;
8082
cudaStream_t stream;
8183
bool generatedTrtFromONNX;
8284
public:
@@ -93,13 +95,6 @@ class TensorrtAPI : public NeuralNetAPI
9395

9496
void predict(float* inputPlanes, float* valueOutput, float* probOutputs, float* auxiliaryOutputs) override;
9597

96-
/**
97-
* @brief retrieve_indices_by_name Sets the layer name indices by names.
98-
* @param verbose If true debug info will be shown
99-
* @return True if all layer names were found, else false
100-
*/
101-
bool retrieve_indices_by_name(bool verbose);
102-
10398
private:
10499
void load_model() override;
105100
void load_parameters() override;
@@ -123,12 +118,11 @@ class TensorrtAPI : public NeuralNetAPI
123118
/**
124119
* @brief set_config_settings Sets the configuration object which will be later used to build the engine
125120
* @param config Configuration object
126-
* @param maxWorkspace Maximum allowable GPU work space for TensorRT tactic selection (e.g. 16_MiB, 1_GiB)
127121
* @param calibrator INT8 calibration object
128122
* @param calibrationStream Calibration stream used for INT8 calibration
129123
*/
130124
void set_config_settings(SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
131-
size_t maxWorkspace, unique_ptr<IInt8Calibrator>& calibrator,
125+
unique_ptr<IInt8Calibrator>& calibrator,
132126
unique_ptr<IBatchStream>& calibrationStream);
133127

134128

0 commit comments

Comments
 (0)