You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
"""Compile an ExportedProgram module for NVIDIA GPUs using TensorRT
@@ -158,6 +160,8 @@ def compile(
158
160
engine_cache_dir (Optional[str]): Directory to store the cached TRT engines
159
161
engine_cache_size (Optional[int]): Maximum hard-disk space (bytes) to use for the engine cache, default is 1GB. If the cache exceeds this size, the oldest engines will be removed by default
160
162
custom_engine_cache (Optional[BaseEngineCache]): Engine cache instance to use for saving and loading engines. Users can provide their own engine cache by inheriting from BaseEngineCache. If used, engine_cache_dir and engine_cache_size will be ignored.
163
+
use_strong_types (bool): Enable strong typing in TensorRT compilation
164
+
use_fp32_acc (bool): This option inserts cast to FP32 nodes around matmul layers and TensorRT ensures the accumulation of matmul happens in FP32. Use this only when FP16 precision is configured in enabled_precisions.
161
165
**kwargs: Any,
162
166
Returns:
163
167
torch.fx.GraphModule: Compiled FX Module, when run it will execute via TensorRT
@@ -197,6 +201,20 @@ def compile(
197
201
"\nThis feature is unimplemented in Torch-TRT Dynamo currently."
f"When use_strong_types is enabled, only torch.float32 is allowed in the enabled_precisions but found {enabled_precisions}"
210
+
)
211
+
212
+
ifuse_fp32_acc:
213
+
logger.debug(
214
+
"FP32 accumulation for matmul layers is enabled. This option should only be enabled if the model already has FP16 weights and has no effect if it has FP32 weights. \
215
+
This flag inserts casts around matmul layers and ensures TensorRT which executes the matmul layers in FP16 with FP32 accumulation."
216
+
)
217
+
200
218
# Aliasing inputs to arg_inputs for better understanding
201
219
ifnotarg_inputsandnotinputs:
202
220
raiseAssertionError("'arg_inputs' and 'inputs' should not both be None.")
calibrator (Union(torch_tensorrt._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration
579
601
allow_shape_tensors: (Experimental) Allow aten::size to output shape tensors using IShapeLayer in TensorRT
580
602
timing_cache_path (str): Path to the timing cache if it exists (or) where it will be saved after compilation
603
+
use_strong_types (bool): Enable strong typing in TensorRT compilation
604
+
use_fp32_acc (bool): This option inserts cast to FP32 nodes around matmul layers and TensorRT ensures the accumulation of matmul happens in FP32. Use this only when FP16 precision is configured in enabled_precisions.
581
605
Returns:
582
606
bytes: Serialized TensorRT engine, can either be saved to a file or deserialized via TensorRT APIs
Copy file name to clipboardExpand all lines: py/torch_tensorrt/dynamo/_settings.py
+6
Original file line number
Diff line number
Diff line change
@@ -30,7 +30,9 @@
30
30
TIMING_CACHE_PATH,
31
31
TRUNCATE_DOUBLE,
32
32
USE_FAST_PARTITIONER,
33
+
USE_FP32_ACC,
33
34
USE_PYTHON_RUNTIME,
35
+
USE_STRONG_TYPES,
34
36
VERSION_COMPATIBLE,
35
37
WORKSPACE_SIZE,
36
38
default_device,
@@ -78,6 +80,8 @@ class CompilationSettings:
78
80
timing_cache_path (str): Path to the timing cache if it exists (or) where it will be saved after compilation
79
81
cache_built_engines (bool): Whether to save the compiled TRT engines to storage
80
82
reuse_cached_engines (bool): Whether to load the compiled TRT engines from storage
83
+
use_strong_types (bool): Enable strong typing in TensorRT compilation
84
+
use_fp32_acc (bool): This option inserts cast to FP32 nodes around matmul layers and TensorRT ensures the accumulation of matmul happens in FP32. Use this only when FP16 precision is configured in enabled_precisions.
0 commit comments