@@ -88,59 +88,15 @@ void TensorrtAPI::load_parameters()
88
88
// do nothing
89
89
}
90
90
91
- bool TensorrtAPI::retrieve_indices_by_name (bool verbose)
92
- {
93
- idxInput = engine->getBindingIndex (nnDesign.inputLayerName .c_str ());
94
- if (idxInput == -1 ) {
95
- info_string_important (" Layer name '" + nnDesign.inputLayerName + " ' not found." );
96
- return false ;
97
- }
98
- idxValueOutput = engine->getBindingIndex (nnDesign.valueOutputName .c_str ());
99
- if (idxValueOutput == -1 ) {
100
- info_string_important (" Layer name '" + nnDesign.valueOutputName + " ' not found." );
101
- return false ;
102
- }
103
- idxPolicyOutput = engine->getBindingIndex (nnDesign.policySoftmaxOutputName .c_str ());
104
- if (idxPolicyOutput == -1 ) {
105
- info_string_important (" Layer name '" + nnDesign.policySoftmaxOutputName + " ' not found." );
106
- return false ;
107
- }
108
- if (nnDesign.hasAuxiliaryOutputs ) {
109
- idxAuxiliaryOutput = engine->getBindingIndex (nnDesign.auxiliaryOutputName .c_str ());
110
- if (idxAuxiliaryOutput == -1 ) {
111
- info_string_important (" Layer name '" + nnDesign.auxiliaryOutputName + " ' not found." );
112
- return false ;
113
- }
114
- }
115
- if (verbose) {
116
- info_string (" Found 'idxInput' at index" , idxInput);
117
- info_string (" Found 'idxValueOutput' at index" , idxValueOutput);
118
- info_string (" Found 'idxPolicyOutput' at index" , idxPolicyOutput);
119
- if (nnDesign.hasAuxiliaryOutputs ) {
120
- info_string (" Found 'idxAuxiliaryOutput' at index" , idxAuxiliaryOutput);
121
- }
122
- }
123
- return true ;
124
- }
125
-
126
91
void TensorrtAPI::init_nn_design ()
127
92
{
128
- nnDesign.hasAuxiliaryOutputs = engine->getNbBindings () > 3 ;
129
- if (!retrieve_indices_by_name (generatedTrtFromONNX)) {
130
- info_string_important (" Fallback to default indices." );
131
- idxInput = nnDesign.inputIdx ;
132
- idxValueOutput = nnDesign.valueOutputIdx + nnDesign.nbInputs ;
133
- idxPolicyOutput = nnDesign.policyOutputIdx + nnDesign.nbInputs ;
134
- idxAuxiliaryOutput = nnDesign.auxiliaryOutputIdx + nnDesign.nbInputs ;
135
- }
136
-
137
- set_shape (nnDesign.inputShape , engine->getBindingDimensions (idxInput));
93
+ set_shape (nnDesign.inputShape , engine->getTensorShape (nnDesign.inputLayerName .c_str ()));
138
94
// make sure that the first dimension is the batch size, otherwise '-1' could cause problems
139
95
nnDesign.inputShape .v [0 ] = batchSize;
140
- set_shape (nnDesign.valueOutputShape , engine->getBindingDimensions (idxValueOutput ));
141
- set_shape (nnDesign.policyOutputShape , engine->getBindingDimensions (idxPolicyOutput ));
96
+ set_shape (nnDesign.valueOutputShape , engine->getTensorShape (nnDesign. valueOutputName . c_str () ));
97
+ set_shape (nnDesign.policyOutputShape , engine->getTensorShape (nnDesign. policySoftmaxOutputName . c_str () ));
142
98
if (nnDesign.hasAuxiliaryOutputs ) {
143
- set_shape (nnDesign.auxiliaryOutputShape , engine->getBindingDimensions (idxAuxiliaryOutput ));
99
+ set_shape (nnDesign.auxiliaryOutputShape , engine->getTensorShape (nnDesign. auxiliaryOutputName . c_str () ));
144
100
}
145
101
nnDesign.isPolicyMap = unsigned (nnDesign.policyOutputShape .v [1 ]) != StateConstants::NB_LABELS ();
146
102
}
@@ -151,7 +107,7 @@ void TensorrtAPI::bind_executor()
151
107
context = SampleUniquePtr<nvinfer1::IExecutionContext>(engine->createExecutionContext ());
152
108
Dims inputDims;
153
109
set_dims (inputDims, nnDesign.inputShape );
154
- context->setBindingDimensions ( 0 , inputDims);
110
+ context->setInputShape (nnDesign. inputLayerName . c_str () , inputDims);
155
111
156
112
// create buffers object with respect to the engine and batch size
157
113
CHECK (cudaStreamCreate (&stream));
@@ -184,8 +140,19 @@ void TensorrtAPI::predict(float* inputPlanes, float* valueOutput, float* probOut
184
140
CHECK (cudaMemcpyAsync (deviceMemory[idxInput], inputPlanes, memorySizes[idxInput],
185
141
cudaMemcpyHostToDevice, stream));
186
142
143
+ context->setTensorAddress (nnDesign.inputLayerName .c_str (), deviceMemory[idxInput]);
144
+ context->setTensorAddress (nnDesign.valueOutputName .c_str (), deviceMemory[idxValueOutput]);
145
+ context->setTensorAddress (nnDesign.policySoftmaxOutputName .c_str (), deviceMemory[idxPolicyOutput]);
146
+ #ifdef DYNAMIC_NN_ARCH
147
+ if (has_auxiliary_outputs ()) {
148
+ #else
149
+ if (StateConstants::NB_AUXILIARY_OUTPUTS ()) {
150
+ #endif
151
+ context->setTensorAddress (nnDesign.auxiliaryOutputName .c_str (), deviceMemory[idxAuxiliaryOutput]);
152
+ }
153
+
187
154
// run inference for given data
188
- context->enqueueV2 (deviceMemory, stream, nullptr );
155
+ context->enqueueV3 ( stream);
189
156
190
157
// copy output from device back to host
191
158
CHECK (cudaMemcpyAsync (valueOutput, deviceMemory[idxValueOutput],
@@ -209,7 +176,6 @@ ICudaEngine* TensorrtAPI::create_cuda_engine_from_onnx()
209
176
info_string (" This may take a few minutes..." );
210
177
// create an engine builder
211
178
SampleUniquePtr<IBuilder> builder = SampleUniquePtr<IBuilder>(createInferBuilder (gLogger .getTRTLogger ()));
212
- builder->setMaxBatchSize (int (batchSize));
213
179
214
180
// create an ONNX network object
215
181
const uint32_t explicitBatch = 1U << static_cast <uint32_t >(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH );
@@ -232,7 +198,7 @@ ICudaEngine* TensorrtAPI::create_cuda_engine_from_onnx()
232
198
SampleUniquePtr<nvinfer1::IBuilderConfig> config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig ());
233
199
unique_ptr<IInt8Calibrator> calibrator;
234
200
unique_ptr<IBatchStream> calibrationStream;
235
- set_config_settings (config, 1_GiB, calibrator, calibrationStream);
201
+ set_config_settings (config, calibrator, calibrationStream);
236
202
237
203
IOptimizationProfile* profile = builder->createOptimizationProfile ();
238
204
@@ -243,12 +209,14 @@ ICudaEngine* TensorrtAPI::create_cuda_engine_from_onnx()
243
209
profile->setDimensions (nnDesign.inputLayerName .c_str (), OptProfileSelector::kMAX , inputDims);
244
210
config->addOptimizationProfile (profile);
245
211
212
+ nnDesign.hasAuxiliaryOutputs = network->getNbOutputs () > 2 ;
213
+
246
214
// build an engine from the TensorRT network with a given configuration struct
247
215
#ifdef TENSORRT7
248
216
return builder->buildEngineWithConfig (*network, *config);
249
217
#else
250
218
SampleUniquePtr<IHostMemory> serializedModel{builder->buildSerializedNetwork (*network, *config)};
251
- SampleUniquePtr<IRuntime> runtime{ createInferRuntime (sample::gLogger .getTRTLogger ())} ;
219
+ runtime = SampleUniquePtr<IRuntime>( createInferRuntime (sample::gLogger .getTRTLogger ())) ;
252
220
253
221
// build an engine from the serialized model
254
222
return runtime->deserializeCudaEngine (serializedModel->data (), serializedModel->size ());;
@@ -263,7 +231,7 @@ ICudaEngine* TensorrtAPI::get_cuda_engine() {
263
231
const char * buffer = read_buffer (trtFilePath, bufferSize);
264
232
if (buffer) {
265
233
info_string (" deserialize engine:" , trtFilePath);
266
- unique_ptr<IRuntime, samplesCommon::InferDeleter> runtime {createInferRuntime (gLogger )};
234
+ runtime = unique_ptr<IRuntime, samplesCommon::InferDeleter>{createInferRuntime (gLogger )};
267
235
#ifdef TENSORRT7
268
236
engine = runtime->deserializeCudaEngine (buffer, bufferSize, nullptr );
269
237
#else
@@ -293,10 +261,9 @@ ICudaEngine* TensorrtAPI::get_cuda_engine() {
293
261
}
294
262
295
263
void TensorrtAPI::set_config_settings (SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
296
- size_t maxWorkspace, unique_ptr<IInt8Calibrator>& calibrator,
264
+ unique_ptr<IInt8Calibrator>& calibrator,
297
265
unique_ptr<IBatchStream>& calibrationStream)
298
266
{
299
- config->setMaxWorkspaceSize (maxWorkspace);
300
267
switch (precision) {
301
268
case float32:
302
269
// default: do nothing
0 commit comments