diff --git a/docs/_cpp_api/classtorch__tensorrt_1_1DataType.html b/docs/_cpp_api/classtorch__tensorrt_1_1DataType.html index 6b14cda2bf..31e6b0f3e3 100644 --- a/docs/_cpp_api/classtorch__tensorrt_1_1DataType.html +++ b/docs/_cpp_api/classtorch__tensorrt_1_1DataType.html @@ -10,7 +10,7 @@ - Class DataType — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Class DataType — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
- v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
@@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/classtorch__tensorrt_1_1Device_1_1DeviceType.html b/docs/_cpp_api/classtorch__tensorrt_1_1Device_1_1DeviceType.html index 56e4276fa4..ec2f7172ba 100644 --- a/docs/_cpp_api/classtorch__tensorrt_1_1Device_1_1DeviceType.html +++ b/docs/_cpp_api/classtorch__tensorrt_1_1Device_1_1DeviceType.html @@ -10,7 +10,7 @@ - Class Device::DeviceType — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Class Device::DeviceType — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/classtorch__tensorrt_1_1TensorFormat.html b/docs/_cpp_api/classtorch__tensorrt_1_1TensorFormat.html index cdad014cac..70c50627f4 100644 --- a/docs/_cpp_api/classtorch__tensorrt_1_1TensorFormat.html +++ b/docs/_cpp_api/classtorch__tensorrt_1_1TensorFormat.html @@ -10,7 +10,7 @@ - Class TensorFormat — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Class TensorFormat — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8CacheCalibrator.html b/docs/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8CacheCalibrator.html index 27ce473618..b8489e5efa 100644 --- a/docs/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8CacheCalibrator.html +++ b/docs/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8CacheCalibrator.html @@ -10,7 +10,7 @@ - Template Class Int8CacheCalibrator — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Template Class Int8CacheCalibrator — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8Calibrator.html b/docs/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8Calibrator.html index 4988a1b0ef..c85615e854 100644 --- a/docs/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8Calibrator.html +++ b/docs/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8Calibrator.html @@ -10,7 +10,7 @@ - Template Class Int8Calibrator — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Template Class Int8Calibrator — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/define_macros_8h_1a18d295a837ac71add5578860b55e5502.html b/docs/_cpp_api/define_macros_8h_1a18d295a837ac71add5578860b55e5502.html index 7f631fec1a..a433a4c113 100644 --- a/docs/_cpp_api/define_macros_8h_1a18d295a837ac71add5578860b55e5502.html +++ b/docs/_cpp_api/define_macros_8h_1a18d295a837ac71add5578860b55e5502.html @@ -10,7 +10,7 @@ - Define STR — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Define STR — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/define_macros_8h_1a282fd3c0b1c3a215148ae372070e1268.html b/docs/_cpp_api/define_macros_8h_1a282fd3c0b1c3a215148ae372070e1268.html index c412cb96f5..9ffed532b5 100644 --- a/docs/_cpp_api/define_macros_8h_1a282fd3c0b1c3a215148ae372070e1268.html +++ b/docs/_cpp_api/define_macros_8h_1a282fd3c0b1c3a215148ae372070e1268.html @@ -10,7 +10,7 @@ - Define TORCH_TENSORRT_PATCH_VERSION — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Define TORCH_TENSORRT_PATCH_VERSION — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/define_macros_8h_1a31398a6d4d27e28817afb0f0139e909e.html b/docs/_cpp_api/define_macros_8h_1a31398a6d4d27e28817afb0f0139e909e.html index 69b88a46af..f5b76d65ec 100644 --- a/docs/_cpp_api/define_macros_8h_1a31398a6d4d27e28817afb0f0139e909e.html +++ b/docs/_cpp_api/define_macros_8h_1a31398a6d4d27e28817afb0f0139e909e.html @@ -10,7 +10,7 @@ - Define TORCH_TENSORRT_MAJOR_VERSION — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Define TORCH_TENSORRT_MAJOR_VERSION — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/define_macros_8h_1a35703561b26b1a9d2738ad7d58b27827.html b/docs/_cpp_api/define_macros_8h_1a35703561b26b1a9d2738ad7d58b27827.html index c3b04cdee3..516d81e51e 100644 --- a/docs/_cpp_api/define_macros_8h_1a35703561b26b1a9d2738ad7d58b27827.html +++ b/docs/_cpp_api/define_macros_8h_1a35703561b26b1a9d2738ad7d58b27827.html @@ -10,7 +10,7 @@ - Define TORCH_TENSORRT_MINOR_VERSION — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Define TORCH_TENSORRT_MINOR_VERSION — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/define_macros_8h_1abd1465eb38256d3f22cc1426b23d516b.html b/docs/_cpp_api/define_macros_8h_1abd1465eb38256d3f22cc1426b23d516b.html index 99f3611481..9ea0983bbb 100644 --- a/docs/_cpp_api/define_macros_8h_1abd1465eb38256d3f22cc1426b23d516b.html +++ b/docs/_cpp_api/define_macros_8h_1abd1465eb38256d3f22cc1426b23d516b.html @@ -10,7 +10,7 @@ - Define TORCHTRT_API — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Define TORCHTRT_API — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/define_macros_8h_1abe87b341f562fd1cf40b7672e4d759da.html b/docs/_cpp_api/define_macros_8h_1abe87b341f562fd1cf40b7672e4d759da.html index 7076dc2cd5..1dc0c8ad7c 100644 --- a/docs/_cpp_api/define_macros_8h_1abe87b341f562fd1cf40b7672e4d759da.html +++ b/docs/_cpp_api/define_macros_8h_1abe87b341f562fd1cf40b7672e4d759da.html @@ -10,7 +10,7 @@ - Define XSTR — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Define XSTR — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/define_macros_8h_1ad19939408f7be171a74a89928b36eb59.html b/docs/_cpp_api/define_macros_8h_1ad19939408f7be171a74a89928b36eb59.html index bb6ce88626..16d42190a2 100644 --- a/docs/_cpp_api/define_macros_8h_1ad19939408f7be171a74a89928b36eb59.html +++ b/docs/_cpp_api/define_macros_8h_1ad19939408f7be171a74a89928b36eb59.html @@ -10,7 +10,7 @@ - Define TORCHTRT_HIDDEN — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Define TORCHTRT_HIDDEN — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/define_macros_8h_1adad592a7b1b7eed529cdf6acd584c883.html b/docs/_cpp_api/define_macros_8h_1adad592a7b1b7eed529cdf6acd584c883.html index 970ac08fca..02ccfdfef0 100644 --- a/docs/_cpp_api/define_macros_8h_1adad592a7b1b7eed529cdf6acd584c883.html +++ b/docs/_cpp_api/define_macros_8h_1adad592a7b1b7eed529cdf6acd584c883.html @@ -10,7 +10,7 @@ - Define TORCH_TENSORRT_VERSION — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Define TORCH_TENSORRT_VERSION — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/dir_cpp.html b/docs/_cpp_api/dir_cpp.html index 489105cc05..f8d936ca00 100644 --- a/docs/_cpp_api/dir_cpp.html +++ b/docs/_cpp_api/dir_cpp.html @@ -10,7 +10,7 @@ - Directory cpp — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Directory cpp — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -273,7 +273,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -314,7 +314,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/dir_cpp_include.html b/docs/_cpp_api/dir_cpp_include.html index a4b8755400..5bd8e32dcb 100644 --- a/docs/_cpp_api/dir_cpp_include.html +++ b/docs/_cpp_api/dir_cpp_include.html @@ -10,7 +10,7 @@ - Directory include — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Directory include — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -273,7 +273,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -314,7 +314,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/dir_cpp_include_torch_tensorrt.html b/docs/_cpp_api/dir_cpp_include_torch_tensorrt.html index ceecc0de14..2b6ec936a2 100644 --- a/docs/_cpp_api/dir_cpp_include_torch_tensorrt.html +++ b/docs/_cpp_api/dir_cpp_include_torch_tensorrt.html @@ -10,7 +10,7 @@ - Directory torch_tensorrt — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Directory torch_tensorrt — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -273,7 +273,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -314,7 +314,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/enum_namespacetorch__tensorrt_1_1logging_1a130f65408ad8cbaee060f05e8db69558.html b/docs/_cpp_api/enum_namespacetorch__tensorrt_1_1logging_1a130f65408ad8cbaee060f05e8db69558.html index 2f8987f1a5..88d1bf933f 100644 --- a/docs/_cpp_api/enum_namespacetorch__tensorrt_1_1logging_1a130f65408ad8cbaee060f05e8db69558.html +++ b/docs/_cpp_api/enum_namespacetorch__tensorrt_1_1logging_1a130f65408ad8cbaee060f05e8db69558.html @@ -10,7 +10,7 @@ - Enum Level — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Enum Level — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/enum_namespacetorch__tensorrt_1a3fbe5d72e4fc624dbd038853079620eb.html b/docs/_cpp_api/enum_namespacetorch__tensorrt_1a3fbe5d72e4fc624dbd038853079620eb.html index 1350674c17..2bd0fe9493 100644 --- a/docs/_cpp_api/enum_namespacetorch__tensorrt_1a3fbe5d72e4fc624dbd038853079620eb.html +++ b/docs/_cpp_api/enum_namespacetorch__tensorrt_1a3fbe5d72e4fc624dbd038853079620eb.html @@ -10,7 +10,7 @@ - Enum EngineCapability — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Enum EngineCapability — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/file_cpp_include_torch_tensorrt_logging.h.html b/docs/_cpp_api/file_cpp_include_torch_tensorrt_logging.h.html index f9248b45c5..14dd18617e 100644 --- a/docs/_cpp_api/file_cpp_include_torch_tensorrt_logging.h.html +++ b/docs/_cpp_api/file_cpp_include_torch_tensorrt_logging.h.html @@ -10,7 +10,7 @@ - File logging.h — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + File logging.h — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -273,7 +273,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -314,7 +314,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/file_cpp_include_torch_tensorrt_macros.h.html b/docs/_cpp_api/file_cpp_include_torch_tensorrt_macros.h.html index c11b6ef60f..96c429fa9d 100644 --- a/docs/_cpp_api/file_cpp_include_torch_tensorrt_macros.h.html +++ b/docs/_cpp_api/file_cpp_include_torch_tensorrt_macros.h.html @@ -10,7 +10,7 @@ - File macros.h — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + File macros.h — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -273,7 +273,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -314,7 +314,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/file_cpp_include_torch_tensorrt_ptq.h.html b/docs/_cpp_api/file_cpp_include_torch_tensorrt_ptq.h.html index b32d4cce28..2ad5cb396e 100644 --- a/docs/_cpp_api/file_cpp_include_torch_tensorrt_ptq.h.html +++ b/docs/_cpp_api/file_cpp_include_torch_tensorrt_ptq.h.html @@ -10,7 +10,7 @@ - File ptq.h — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + File ptq.h — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -273,7 +273,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -314,7 +314,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/file_cpp_include_torch_tensorrt_torch_tensorrt.h.html b/docs/_cpp_api/file_cpp_include_torch_tensorrt_torch_tensorrt.h.html index bed67f4302..407cab6a57 100644 --- a/docs/_cpp_api/file_cpp_include_torch_tensorrt_torch_tensorrt.h.html +++ b/docs/_cpp_api/file_cpp_include_torch_tensorrt_torch_tensorrt.h.html @@ -10,7 +10,7 @@ - File torch_tensorrt.h — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + File torch_tensorrt.h — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -273,7 +273,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -314,7 +314,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1a0593f776f469c20469e2f729fc7861a3.html b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1a0593f776f469c20469e2f729fc7861a3.html index 4ec4666005..f7a5ad663a 100644 --- a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1a0593f776f469c20469e2f729fc7861a3.html +++ b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1a0593f776f469c20469e2f729fc7861a3.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::logging::get_logging_prefix — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Function torch_tensorrt::logging::get_logging_prefix — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1a0c012cb374addd90eb1f42eaec570650.html b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1a0c012cb374addd90eb1f42eaec570650.html index 5b551ff0e0..e552eadb27 100644 --- a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1a0c012cb374addd90eb1f42eaec570650.html +++ b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1a0c012cb374addd90eb1f42eaec570650.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::logging::get_reportable_log_level — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Function torch_tensorrt::logging::get_reportable_log_level — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1a56e110feaaba2c3fd44bd201fd21a76a.html b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1a56e110feaaba2c3fd44bd201fd21a76a.html index 36fa200764..25a9acee0a 100644 --- a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1a56e110feaaba2c3fd44bd201fd21a76a.html +++ b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1a56e110feaaba2c3fd44bd201fd21a76a.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::logging::get_is_colored_output_on — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Function torch_tensorrt::logging::get_is_colored_output_on — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1a7cb50492421ea9de4e3db895819df6f2.html b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1a7cb50492421ea9de4e3db895819df6f2.html index d8ab2dd614..ab7ca3c3d9 100644 --- a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1a7cb50492421ea9de4e3db895819df6f2.html +++ b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1a7cb50492421ea9de4e3db895819df6f2.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::logging::set_reportable_log_level — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Function torch_tensorrt::logging::set_reportable_log_level — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1ac46ac0901cb97e3ae6e93b45f24e90b8.html b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1ac46ac0901cb97e3ae6e93b45f24e90b8.html index 7c017a305c..c91a7c1318 100644 --- a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1ac46ac0901cb97e3ae6e93b45f24e90b8.html +++ b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1ac46ac0901cb97e3ae6e93b45f24e90b8.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::logging::log — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Function torch_tensorrt::logging::log — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1ad2efd47b6c3689e58ccc595680579ae5.html b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1ad2efd47b6c3689e58ccc595680579ae5.html index f7bd0d0cda..6539a9fa08 100644 --- a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1ad2efd47b6c3689e58ccc595680579ae5.html +++ b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1ad2efd47b6c3689e58ccc595680579ae5.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::logging::set_is_colored_output_on — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Function torch_tensorrt::logging::set_is_colored_output_on — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1af8f3443813315af7901903d25dd495cc.html b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1af8f3443813315af7901903d25dd495cc.html index e7ca1b5e1d..cb3438fbd4 100644 --- a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1af8f3443813315af7901903d25dd495cc.html +++ b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1logging_1af8f3443813315af7901903d25dd495cc.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::logging::set_logging_prefix — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Function torch_tensorrt::logging::set_logging_prefix — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1ptq_1a226e3c83379d1012cde8578c1c86b16c.html b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1ptq_1a226e3c83379d1012cde8578c1c86b16c.html index 220d8622ae..39eb779e61 100644 --- a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1ptq_1a226e3c83379d1012cde8578c1c86b16c.html +++ b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1ptq_1a226e3c83379d1012cde8578c1c86b16c.html @@ -10,7 +10,7 @@ - Template Function torch_tensorrt::ptq::make_int8_cache_calibrator — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Template Function torch_tensorrt::ptq::make_int8_cache_calibrator — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1ptq_1a6186e305f47c1d94b6130ef6c7f7e178.html b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1ptq_1a6186e305f47c1d94b6130ef6c7f7e178.html index f6b8c1b37b..68a2191bfa 100644 --- a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1ptq_1a6186e305f47c1d94b6130ef6c7f7e178.html +++ b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1ptq_1a6186e305f47c1d94b6130ef6c7f7e178.html @@ -10,7 +10,7 @@ - Template Function torch_tensorrt::ptq::make_int8_calibrator — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Template Function torch_tensorrt::ptq::make_int8_calibrator — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1torchscript_1a5b405fd3bf3c8fc2e2a54cbbab979797.html b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1torchscript_1a5b405fd3bf3c8fc2e2a54cbbab979797.html index 433d2f3d28..7a611c9e68 100644 --- a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1torchscript_1a5b405fd3bf3c8fc2e2a54cbbab979797.html +++ b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1torchscript_1a5b405fd3bf3c8fc2e2a54cbbab979797.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::torchscript::check_method_operator_support — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Function torch_tensorrt::torchscript::check_method_operator_support — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1torchscript_1a6e19490a08fb1553c9dd347a5ae79db9.html b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1torchscript_1a6e19490a08fb1553c9dd347a5ae79db9.html index 6e70ceb57a..95bb8b9f20 100644 --- a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1torchscript_1a6e19490a08fb1553c9dd347a5ae79db9.html +++ b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1torchscript_1a6e19490a08fb1553c9dd347a5ae79db9.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::torchscript::compile — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Function torch_tensorrt::torchscript::compile — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1torchscript_1a81f9783517335dda877d8cfcf38987c9.html b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1torchscript_1a81f9783517335dda877d8cfcf38987c9.html index 4c4310b4b6..5596faa1e8 100644 --- a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1torchscript_1a81f9783517335dda877d8cfcf38987c9.html +++ b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1torchscript_1a81f9783517335dda877d8cfcf38987c9.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::torchscript::embed_engine_in_new_module — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Function torch_tensorrt::torchscript::embed_engine_in_new_module — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1torchscript_1ae8d56472106eeef37fbe51ff7f40c9b2.html b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1torchscript_1ae8d56472106eeef37fbe51ff7f40c9b2.html index 1a5e4bebe4..a77675787a 100644 --- a/docs/_cpp_api/function_namespacetorch__tensorrt_1_1torchscript_1ae8d56472106eeef37fbe51ff7f40c9b2.html +++ b/docs/_cpp_api/function_namespacetorch__tensorrt_1_1torchscript_1ae8d56472106eeef37fbe51ff7f40c9b2.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::torchscript::convert_method_to_trt_engine — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Function torch_tensorrt::torchscript::convert_method_to_trt_engine — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/function_namespacetorch__tensorrt_1ac4ab8313ae72c2c899ea31548b528528.html b/docs/_cpp_api/function_namespacetorch__tensorrt_1ac4ab8313ae72c2c899ea31548b528528.html index 8565fa51ee..88bd6c2ced 100644 --- a/docs/_cpp_api/function_namespacetorch__tensorrt_1ac4ab8313ae72c2c899ea31548b528528.html +++ b/docs/_cpp_api/function_namespacetorch__tensorrt_1ac4ab8313ae72c2c899ea31548b528528.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::get_build_info — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Function torch_tensorrt::get_build_info — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/function_namespacetorch__tensorrt_1ad1acd06eaeaffbbcf6e7ebf426891384.html b/docs/_cpp_api/function_namespacetorch__tensorrt_1ad1acd06eaeaffbbcf6e7ebf426891384.html index eaf8564f4b..a1ed2686c1 100644 --- a/docs/_cpp_api/function_namespacetorch__tensorrt_1ad1acd06eaeaffbbcf6e7ebf426891384.html +++ b/docs/_cpp_api/function_namespacetorch__tensorrt_1ad1acd06eaeaffbbcf6e7ebf426891384.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::set_device — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Function torch_tensorrt::set_device — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/function_namespacetorch__tensorrt_1ad6a4ee8ca6c8f6e5519eb1128ec7f4a1.html b/docs/_cpp_api/function_namespacetorch__tensorrt_1ad6a4ee8ca6c8f6e5519eb1128ec7f4a1.html index 4ddaeda3af..82be1de9b6 100644 --- a/docs/_cpp_api/function_namespacetorch__tensorrt_1ad6a4ee8ca6c8f6e5519eb1128ec7f4a1.html +++ b/docs/_cpp_api/function_namespacetorch__tensorrt_1ad6a4ee8ca6c8f6e5519eb1128ec7f4a1.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::dump_build_info — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Function torch_tensorrt::dump_build_info — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/namespace_torch_tensorrt.html b/docs/_cpp_api/namespace_torch_tensorrt.html index 861a306a04..96a0d5fdcc 100644 --- a/docs/_cpp_api/namespace_torch_tensorrt.html +++ b/docs/_cpp_api/namespace_torch_tensorrt.html @@ -10,7 +10,7 @@ - Namespace torch_tensorrt — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Namespace torch_tensorrt — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/namespace_torch_tensorrt__logging.html b/docs/_cpp_api/namespace_torch_tensorrt__logging.html index efc2b48795..34d0fe3a5a 100644 --- a/docs/_cpp_api/namespace_torch_tensorrt__logging.html +++ b/docs/_cpp_api/namespace_torch_tensorrt__logging.html @@ -10,7 +10,7 @@ - Namespace torch_tensorrt::logging — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Namespace torch_tensorrt::logging — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/namespace_torch_tensorrt__ptq.html b/docs/_cpp_api/namespace_torch_tensorrt__ptq.html index 663ea902c1..7527a31589 100644 --- a/docs/_cpp_api/namespace_torch_tensorrt__ptq.html +++ b/docs/_cpp_api/namespace_torch_tensorrt__ptq.html @@ -10,7 +10,7 @@ - Namespace torch_tensorrt::ptq — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Namespace torch_tensorrt::ptq — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/namespace_torch_tensorrt__torchscript.html b/docs/_cpp_api/namespace_torch_tensorrt__torchscript.html index 4cc3e88d83..674ebdbd62 100644 --- a/docs/_cpp_api/namespace_torch_tensorrt__torchscript.html +++ b/docs/_cpp_api/namespace_torch_tensorrt__torchscript.html @@ -10,7 +10,7 @@ - Namespace torch_tensorrt::torchscript — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Namespace torch_tensorrt::torchscript — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_logging.h.html b/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_logging.h.html index 8d3e2ebb26..c95192e7b3 100644 --- a/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_logging.h.html +++ b/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_logging.h.html @@ -10,7 +10,7 @@ - Program Listing for File logging.h — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Program Listing for File logging.h — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -273,7 +273,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -314,7 +314,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_macros.h.html b/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_macros.h.html index f7efab2132..4d26e6ee09 100644 --- a/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_macros.h.html +++ b/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_macros.h.html @@ -10,7 +10,7 @@ - Program Listing for File macros.h — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Program Listing for File macros.h — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -273,7 +273,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -314,7 +314,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_ptq.h.html b/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_ptq.h.html index 782faef333..8934a73239 100644 --- a/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_ptq.h.html +++ b/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_ptq.h.html @@ -10,7 +10,7 @@ - Program Listing for File ptq.h — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Program Listing for File ptq.h — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -273,7 +273,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -314,7 +314,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_torch_tensorrt.h.html b/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_torch_tensorrt.h.html index e025b9f969..d536fb5050 100644 --- a/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_torch_tensorrt.h.html +++ b/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_torch_tensorrt.h.html @@ -10,7 +10,7 @@ - Program Listing for File torch_tensorrt.h — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Program Listing for File torch_tensorrt.h — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -273,7 +273,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -314,7 +314,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/structtorch__tensorrt_1_1Device.html b/docs/_cpp_api/structtorch__tensorrt_1_1Device.html index 4c8e646b88..b0b5055f12 100644 --- a/docs/_cpp_api/structtorch__tensorrt_1_1Device.html +++ b/docs/_cpp_api/structtorch__tensorrt_1_1Device.html @@ -10,7 +10,7 @@ - Struct Device — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Struct Device — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/structtorch__tensorrt_1_1GraphInputs.html b/docs/_cpp_api/structtorch__tensorrt_1_1GraphInputs.html index 988a84f3e1..e151e4ab5a 100644 --- a/docs/_cpp_api/structtorch__tensorrt_1_1GraphInputs.html +++ b/docs/_cpp_api/structtorch__tensorrt_1_1GraphInputs.html @@ -10,7 +10,7 @@ - Struct GraphInputs — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Struct GraphInputs — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/structtorch__tensorrt_1_1Input.html b/docs/_cpp_api/structtorch__tensorrt_1_1Input.html index b731fc8620..803d44a2bc 100644 --- a/docs/_cpp_api/structtorch__tensorrt_1_1Input.html +++ b/docs/_cpp_api/structtorch__tensorrt_1_1Input.html @@ -10,7 +10,7 @@ - Struct Input — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Struct Input — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/structtorch__tensorrt_1_1torchscript_1_1CompileSpec.html b/docs/_cpp_api/structtorch__tensorrt_1_1torchscript_1_1CompileSpec.html index 86c7f63ed8..12201ef875 100644 --- a/docs/_cpp_api/structtorch__tensorrt_1_1torchscript_1_1CompileSpec.html +++ b/docs/_cpp_api/structtorch__tensorrt_1_1torchscript_1_1CompileSpec.html @@ -10,7 +10,7 @@ - Struct CompileSpec — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Struct CompileSpec — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/torch_tensort_cpp.html b/docs/_cpp_api/torch_tensort_cpp.html index c144491398..5cf53677b5 100644 --- a/docs/_cpp_api/torch_tensort_cpp.html +++ b/docs/_cpp_api/torch_tensort_cpp.html @@ -10,7 +10,7 @@ - Torch-TensorRT C++ API — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Torch-TensorRT C++ API — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_cpp_api/unabridged_orphan.html b/docs/_cpp_api/unabridged_orphan.html index 86ac7f3a92..8125ef370b 100644 --- a/docs/_cpp_api/unabridged_orphan.html +++ b/docs/_cpp_api/unabridged_orphan.html @@ -10,7 +10,7 @@ - Full API — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Full API — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -273,7 +273,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -314,7 +314,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_downloads/6a6052d9668b2cb8332d349d328e21c1/_rendered_examples_jupyter.zip b/docs/_downloads/6a6052d9668b2cb8332d349d328e21c1/_rendered_examples_jupyter.zip index 409477c2b9..9ebd46bac6 100644 Binary files a/docs/_downloads/6a6052d9668b2cb8332d349d328e21c1/_rendered_examples_jupyter.zip and b/docs/_downloads/6a6052d9668b2cb8332d349d328e21c1/_rendered_examples_jupyter.zip differ diff --git a/docs/_downloads/798cda8f83bd9f5e2cc93f329a04332c/_rendered_examples_python.zip b/docs/_downloads/798cda8f83bd9f5e2cc93f329a04332c/_rendered_examples_python.zip index 7d691ba5a6..43eba5e840 100644 Binary files a/docs/_downloads/798cda8f83bd9f5e2cc93f329a04332c/_rendered_examples_python.zip and b/docs/_downloads/798cda8f83bd9f5e2cc93f329a04332c/_rendered_examples_python.zip differ diff --git a/docs/_downloads/d606a9660cce1388933de8448182f4ee/vgg16_ptq.ipynb b/docs/_downloads/d606a9660cce1388933de8448182f4ee/vgg16_ptq.ipynb new file mode 100644 index 0000000000..d645d369b1 --- /dev/null +++ b/docs/_downloads/d606a9660cce1388933de8448182f4ee/vgg16_ptq.ipynb @@ -0,0 +1,140 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n\n# Deploy Quantized Models using Torch-TensorRT\n\nHere we demonstrate how to deploy a model quantized to INT8 or FP8 using the Dynamo frontend of Torch-TensorRT\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Imports and Model Definition\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import argparse\n\nimport modelopt.torch.quantization as mtq\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch_tensorrt as torchtrt\nimport torchvision.datasets as datasets\nimport torchvision.transforms as transforms\nfrom modelopt.torch.quantization.utils import export_torch_mode\n\n\nclass VGG(nn.Module):\n def __init__(self, layer_spec, num_classes=1000, init_weights=False):\n super(VGG, self).__init__()\n\n layers = []\n in_channels = 3\n for l in layer_spec:\n if l == \"pool\":\n layers.append(nn.MaxPool2d(kernel_size=2, stride=2))\n else:\n layers += [\n nn.Conv2d(in_channels, l, kernel_size=3, padding=1),\n nn.BatchNorm2d(l),\n nn.ReLU(),\n ]\n in_channels = l\n\n self.features = nn.Sequential(*layers)\n self.avgpool = nn.AdaptiveAvgPool2d((1, 1))\n self.classifier = nn.Sequential(\n nn.Linear(512 * 1 * 1, 4096),\n nn.ReLU(),\n nn.Dropout(),\n nn.Linear(4096, 4096),\n nn.ReLU(),\n nn.Dropout(),\n nn.Linear(4096, num_classes),\n )\n if init_weights:\n self._initialize_weights()\n\n def _initialize_weights(self):\n for m in self.modules():\n if isinstance(m, nn.Conv2d):\n nn.init.kaiming_normal_(m.weight, mode=\"fan_out\", nonlinearity=\"relu\")\n if m.bias is not None:\n nn.init.constant_(m.bias, 0)\n elif isinstance(m, nn.BatchNorm2d):\n nn.init.constant_(m.weight, 1)\n nn.init.constant_(m.bias, 0)\n elif isinstance(m, nn.Linear):\n nn.init.normal_(m.weight, 0, 0.01)\n nn.init.constant_(m.bias, 0)\n\n def forward(self, x):\n x = self.features(x)\n x = self.avgpool(x)\n x = torch.flatten(x, 1)\n x = self.classifier(x)\n return x\n\n\ndef vgg16(num_classes=1000, init_weights=False):\n vgg16_cfg = [\n 64,\n 64,\n \"pool\",\n 128,\n 128,\n \"pool\",\n 256,\n 256,\n 256,\n \"pool\",\n 512,\n 512,\n 512,\n \"pool\",\n 512,\n 512,\n 512,\n \"pool\",\n ]\n return VGG(vgg16_cfg, num_classes, init_weights)\n\n\nPARSER = argparse.ArgumentParser(\n description=\"Load pre-trained VGG model and then tune with FP8 and PTQ. For having a pre-trained VGG model, please refer to https://github.com/pytorch/TensorRT/tree/main/examples/int8/training/vgg16\"\n)\nPARSER.add_argument(\n \"--ckpt\", type=str, required=True, help=\"Path to the pre-trained checkpoint\"\n)\nPARSER.add_argument(\n \"--batch-size\",\n default=128,\n type=int,\n help=\"Batch size for tuning the model with PTQ and FP8\",\n)\nPARSER.add_argument(\n \"--quantize-type\",\n default=\"int8\",\n type=str,\n help=\"quantization type, currently supported int8 or fp8 for PTQ\",\n)\nargs = PARSER.parse_args()\n\nmodel = vgg16(num_classes=10, init_weights=False)\nmodel = model.cuda()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load the pre-trained model weights\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ckpt = torch.load(args.ckpt)\nweights = ckpt[\"model_state_dict\"]\n\nif torch.cuda.device_count() > 1:\n from collections import OrderedDict\n\n new_state_dict = OrderedDict()\n for k, v in weights.items():\n name = k[7:] # remove `module.`\n new_state_dict[name] = v\n weights = new_state_dict\n\nmodel.load_state_dict(weights)\n# Don't forget to set the model to evaluation mode!\nmodel.eval()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load training dataset and define loss function for PTQ\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "training_dataset = datasets.CIFAR10(\n root=\"./data\",\n train=True,\n download=True,\n transform=transforms.Compose(\n [\n transforms.RandomCrop(32, padding=4),\n transforms.RandomHorizontalFlip(),\n transforms.ToTensor(),\n transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),\n ]\n ),\n)\ntraining_dataloader = torch.utils.data.DataLoader(\n training_dataset,\n batch_size=args.batch_size,\n shuffle=True,\n num_workers=2,\n drop_last=True,\n)\n\ndata = iter(training_dataloader)\nimages, _ = next(data)\n\ncrit = nn.CrossEntropyLoss()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define Calibration Loop for quantization\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "def calibrate_loop(model):\n # calibrate over the training dataset\n total = 0\n correct = 0\n loss = 0.0\n for data, labels in training_dataloader:\n data, labels = data.cuda(), labels.cuda(non_blocking=True)\n out = model(data)\n loss += crit(out, labels)\n preds = torch.max(out, 1)[1]\n total += labels.size(0)\n correct += (preds == labels).sum().item()\n\n print(\"PTQ Loss: {:.5f} Acc: {:.2f}%\".format(loss / total, 100 * correct / total))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tune the pre-trained model with FP8 and PTQ\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "if args.quantize_type == \"int8\":\n quant_cfg = mtq.INT8_DEFAULT_CFG\nelif args.quantize_type == \"fp8\":\n quant_cfg = mtq.FP8_DEFAULT_CFG\n# PTQ with in-place replacement to quantized modules\nmtq.quantize(model, quant_cfg, forward_loop=calibrate_loop)\n# model has FP8 qdq nodes at this point" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Inference\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Load the testing dataset\ntesting_dataset = datasets.CIFAR10(\n root=\"./data\",\n train=False,\n download=True,\n transform=transforms.Compose(\n [\n transforms.ToTensor(),\n transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),\n ]\n ),\n)\n\ntesting_dataloader = torch.utils.data.DataLoader(\n testing_dataset,\n batch_size=args.batch_size,\n shuffle=False,\n num_workers=2,\n drop_last=True,\n) # set drop_last=True to drop the last incomplete batch for static shape `torchtrt.dynamo.compile()`\n\nwith torch.no_grad():\n with export_torch_mode():\n # Compile the model with Torch-TensorRT Dynamo backend\n input_tensor = images.cuda()\n # torch.export.export() failed due to RuntimeError: Attempting to use FunctionalTensor on its own. Instead, please use it with a corresponding FunctionalTensorMode()\n from torch.export._trace import _export\n\n exp_program = _export(model, (input_tensor,))\n if args.quantize_type == \"int8\":\n enabled_precisions = {torch.int8}\n elif args.quantize_type == \"fp8\":\n enabled_precisions = {torch.float8_e4m3fn}\n trt_model = torchtrt.dynamo.compile(\n exp_program,\n inputs=[input_tensor],\n enabled_precisions=enabled_precisions,\n min_block_size=1,\n debug=False,\n )\n # You can also use torch compile path to compile the model with Torch-TensorRT:\n # trt_model = torch.compile(model, backend=\"tensorrt\")\n\n # Inference compiled Torch-TensorRT model over the testing dataset\n total = 0\n correct = 0\n loss = 0.0\n class_probs = []\n class_preds = []\n for data, labels in testing_dataloader:\n data, labels = data.cuda(), labels.cuda(non_blocking=True)\n out = trt_model(data)\n loss += crit(out, labels)\n preds = torch.max(out, 1)[1]\n class_probs.append([F.softmax(i, dim=0) for i in out])\n class_preds.append(preds)\n total += labels.size(0)\n correct += (preds == labels).sum().item()\n\n test_probs = torch.cat([torch.stack(batch) for batch in class_probs])\n test_preds = torch.cat(class_preds)\n test_loss = loss / total\n test_acc = correct / total\n print(\"Test Loss: {:.5f} Test Acc: {:.2f}%\".format(test_loss, 100 * test_acc))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/docs/_downloads/ef6d47fc0355ddff78547f419a7ddbf6/vgg16_ptq.py b/docs/_downloads/ef6d47fc0355ddff78547f419a7ddbf6/vgg16_ptq.py new file mode 100644 index 0000000000..4ca19e1fd0 --- /dev/null +++ b/docs/_downloads/ef6d47fc0355ddff78547f419a7ddbf6/vgg16_ptq.py @@ -0,0 +1,274 @@ +""" +.. _vgg16_ptq: + +Deploy Quantized Models using Torch-TensorRT +====================================================== + +Here we demonstrate how to deploy a model quantized to INT8 or FP8 using the Dynamo frontend of Torch-TensorRT +""" + +# %% +# Imports and Model Definition +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +import argparse + +import modelopt.torch.quantization as mtq +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch_tensorrt as torchtrt +import torchvision.datasets as datasets +import torchvision.transforms as transforms +from modelopt.torch.quantization.utils import export_torch_mode + + +class VGG(nn.Module): + def __init__(self, layer_spec, num_classes=1000, init_weights=False): + super(VGG, self).__init__() + + layers = [] + in_channels = 3 + for l in layer_spec: + if l == "pool": + layers.append(nn.MaxPool2d(kernel_size=2, stride=2)) + else: + layers += [ + nn.Conv2d(in_channels, l, kernel_size=3, padding=1), + nn.BatchNorm2d(l), + nn.ReLU(), + ] + in_channels = l + + self.features = nn.Sequential(*layers) + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.classifier = nn.Sequential( + nn.Linear(512 * 1 * 1, 4096), + nn.ReLU(), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(), + nn.Dropout(), + nn.Linear(4096, num_classes), + ) + if init_weights: + self._initialize_weights() + + def _initialize_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.constant_(m.bias, 0) + + def forward(self, x): + x = self.features(x) + x = self.avgpool(x) + x = torch.flatten(x, 1) + x = self.classifier(x) + return x + + +def vgg16(num_classes=1000, init_weights=False): + vgg16_cfg = [ + 64, + 64, + "pool", + 128, + 128, + "pool", + 256, + 256, + 256, + "pool", + 512, + 512, + 512, + "pool", + 512, + 512, + 512, + "pool", + ] + return VGG(vgg16_cfg, num_classes, init_weights) + + +PARSER = argparse.ArgumentParser( + description="Load pre-trained VGG model and then tune with FP8 and PTQ. For having a pre-trained VGG model, please refer to https://github.com/pytorch/TensorRT/tree/main/examples/int8/training/vgg16" +) +PARSER.add_argument( + "--ckpt", type=str, required=True, help="Path to the pre-trained checkpoint" +) +PARSER.add_argument( + "--batch-size", + default=128, + type=int, + help="Batch size for tuning the model with PTQ and FP8", +) +PARSER.add_argument( + "--quantize-type", + default="int8", + type=str, + help="quantization type, currently supported int8 or fp8 for PTQ", +) +args = PARSER.parse_args() + +model = vgg16(num_classes=10, init_weights=False) +model = model.cuda() + +# %% +# Load the pre-trained model weights +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +ckpt = torch.load(args.ckpt) +weights = ckpt["model_state_dict"] + +if torch.cuda.device_count() > 1: + from collections import OrderedDict + + new_state_dict = OrderedDict() + for k, v in weights.items(): + name = k[7:] # remove `module.` + new_state_dict[name] = v + weights = new_state_dict + +model.load_state_dict(weights) +# Don't forget to set the model to evaluation mode! +model.eval() + +# %% +# Load training dataset and define loss function for PTQ +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +training_dataset = datasets.CIFAR10( + root="./data", + train=True, + download=True, + transform=transforms.Compose( + [ + transforms.RandomCrop(32, padding=4), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), + ] + ), +) +training_dataloader = torch.utils.data.DataLoader( + training_dataset, + batch_size=args.batch_size, + shuffle=True, + num_workers=2, + drop_last=True, +) + +data = iter(training_dataloader) +images, _ = next(data) + +crit = nn.CrossEntropyLoss() + +# %% +# Define Calibration Loop for quantization +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +def calibrate_loop(model): + # calibrate over the training dataset + total = 0 + correct = 0 + loss = 0.0 + for data, labels in training_dataloader: + data, labels = data.cuda(), labels.cuda(non_blocking=True) + out = model(data) + loss += crit(out, labels) + preds = torch.max(out, 1)[1] + total += labels.size(0) + correct += (preds == labels).sum().item() + + print("PTQ Loss: {:.5f} Acc: {:.2f}%".format(loss / total, 100 * correct / total)) + + +# %% +# Tune the pre-trained model with FP8 and PTQ +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +if args.quantize_type == "int8": + quant_cfg = mtq.INT8_DEFAULT_CFG +elif args.quantize_type == "fp8": + quant_cfg = mtq.FP8_DEFAULT_CFG +# PTQ with in-place replacement to quantized modules +mtq.quantize(model, quant_cfg, forward_loop=calibrate_loop) +# model has FP8 qdq nodes at this point + +# %% +# Inference +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +# Load the testing dataset +testing_dataset = datasets.CIFAR10( + root="./data", + train=False, + download=True, + transform=transforms.Compose( + [ + transforms.ToTensor(), + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), + ] + ), +) + +testing_dataloader = torch.utils.data.DataLoader( + testing_dataset, + batch_size=args.batch_size, + shuffle=False, + num_workers=2, + drop_last=True, +) # set drop_last=True to drop the last incomplete batch for static shape `torchtrt.dynamo.compile()` + +with torch.no_grad(): + with export_torch_mode(): + # Compile the model with Torch-TensorRT Dynamo backend + input_tensor = images.cuda() + # torch.export.export() failed due to RuntimeError: Attempting to use FunctionalTensor on its own. Instead, please use it with a corresponding FunctionalTensorMode() + from torch.export._trace import _export + + exp_program = _export(model, (input_tensor,)) + if args.quantize_type == "int8": + enabled_precisions = {torch.int8} + elif args.quantize_type == "fp8": + enabled_precisions = {torch.float8_e4m3fn} + trt_model = torchtrt.dynamo.compile( + exp_program, + inputs=[input_tensor], + enabled_precisions=enabled_precisions, + min_block_size=1, + debug=False, + ) + # You can also use torch compile path to compile the model with Torch-TensorRT: + # trt_model = torch.compile(model, backend="tensorrt") + + # Inference compiled Torch-TensorRT model over the testing dataset + total = 0 + correct = 0 + loss = 0.0 + class_probs = [] + class_preds = [] + for data, labels in testing_dataloader: + data, labels = data.cuda(), labels.cuda(non_blocking=True) + out = trt_model(data) + loss += crit(out, labels) + preds = torch.max(out, 1)[1] + class_probs.append([F.softmax(i, dim=0) for i in out]) + class_preds.append(preds) + total += labels.size(0) + correct += (preds == labels).sum().item() + + test_probs = torch.cat([torch.stack(batch) for batch in class_probs]) + test_preds = torch.cat(class_preds) + test_loss = loss / total + test_acc = correct / total + print("Test Loss: {:.5f} Test Acc: {:.2f}%".format(test_loss, 100 * test_acc)) diff --git a/docs/_images/sphx_glr_vgg16_ptq_thumb.png b/docs/_images/sphx_glr_vgg16_ptq_thumb.png new file mode 100644 index 0000000000..8a5fed589d Binary files /dev/null and b/docs/_images/sphx_glr_vgg16_ptq_thumb.png differ diff --git a/docs/_modules/index.html b/docs/_modules/index.html index c3806953e3..2cf3f16ce4 100644 --- a/docs/_modules/index.html +++ b/docs/_modules/index.html @@ -9,7 +9,7 @@ - Overview: module code — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Overview: module code — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -272,7 +272,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -313,7 +313,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_modules/torch_tensorrt/_Device.html b/docs/_modules/torch_tensorrt/_Device.html index b324d0277a..96eeeca9d5 100644 --- a/docs/_modules/torch_tensorrt/_Device.html +++ b/docs/_modules/torch_tensorrt/_Device.html @@ -9,7 +9,7 @@ - torch_tensorrt._Device — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + torch_tensorrt._Device — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -272,7 +272,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -313,7 +313,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_modules/torch_tensorrt/_Input.html b/docs/_modules/torch_tensorrt/_Input.html index 29f0274dd4..c30abf2797 100644 --- a/docs/_modules/torch_tensorrt/_Input.html +++ b/docs/_modules/torch_tensorrt/_Input.html @@ -9,7 +9,7 @@ - torch_tensorrt._Input — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + torch_tensorrt._Input — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -272,7 +272,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -313,7 +313,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_modules/torch_tensorrt/_compile.html b/docs/_modules/torch_tensorrt/_compile.html index 1dbb59bfa0..cd403e1bbb 100644 --- a/docs/_modules/torch_tensorrt/_compile.html +++ b/docs/_modules/torch_tensorrt/_compile.html @@ -9,7 +9,7 @@ - torch_tensorrt._compile — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + torch_tensorrt._compile — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -272,7 +272,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -313,7 +313,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_modules/torch_tensorrt/_enums.html b/docs/_modules/torch_tensorrt/_enums.html index 2b612cf358..153d62a6f4 100644 --- a/docs/_modules/torch_tensorrt/_enums.html +++ b/docs/_modules/torch_tensorrt/_enums.html @@ -9,7 +9,7 @@ - torch_tensorrt._enums — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + torch_tensorrt._enums — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -272,7 +272,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -313,7 +313,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    @@ -460,11 +458,10 @@

    Source code for torch_tensorrt._enums

     from typing import Any, Optional, Type, Union
     
     import numpy as np
    +import tensorrt as trt
     import torch
     from torch_tensorrt._features import ENABLED_FEATURES, needs_torch_tensorrt_runtime
     
    -import tensorrt as trt
    -
     
     
    [docs]class dtype(Enum): """Enum to describe data types to Torch-TensorRT, has compatibility with torch, tensorrt and numpy dtypes""" diff --git a/docs/_modules/torch_tensorrt/dynamo/_compiler.html b/docs/_modules/torch_tensorrt/dynamo/_compiler.html index 7965144fd2..76e9f99b56 100644 --- a/docs/_modules/torch_tensorrt/dynamo/_compiler.html +++ b/docs/_modules/torch_tensorrt/dynamo/_compiler.html @@ -9,7 +9,7 @@ - torch_tensorrt.dynamo._compiler — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + torch_tensorrt.dynamo._compiler — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -272,7 +272,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -313,7 +313,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_modules/torch_tensorrt/dynamo/_exporter.html b/docs/_modules/torch_tensorrt/dynamo/_exporter.html index e4878a9aa2..7beef6382d 100644 --- a/docs/_modules/torch_tensorrt/dynamo/_exporter.html +++ b/docs/_modules/torch_tensorrt/dynamo/_exporter.html @@ -9,7 +9,7 @@ - torch_tensorrt.dynamo._exporter — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + torch_tensorrt.dynamo._exporter — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -272,7 +272,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -313,7 +313,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_modules/torch_tensorrt/dynamo/_refit.html b/docs/_modules/torch_tensorrt/dynamo/_refit.html index 350fa6e066..154ae0777e 100644 --- a/docs/_modules/torch_tensorrt/dynamo/_refit.html +++ b/docs/_modules/torch_tensorrt/dynamo/_refit.html @@ -9,7 +9,7 @@ - torch_tensorrt.dynamo._refit — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + torch_tensorrt.dynamo._refit — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -272,7 +272,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -313,7 +313,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_modules/torch_tensorrt/dynamo/_settings.html b/docs/_modules/torch_tensorrt/dynamo/_settings.html index aed75f1aa5..62a8669834 100644 --- a/docs/_modules/torch_tensorrt/dynamo/_settings.html +++ b/docs/_modules/torch_tensorrt/dynamo/_settings.html @@ -9,7 +9,7 @@ - torch_tensorrt.dynamo._settings — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + torch_tensorrt.dynamo._settings — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -272,7 +272,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -313,7 +313,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_modules/torch_tensorrt/dynamo/_tracer.html b/docs/_modules/torch_tensorrt/dynamo/_tracer.html index 10ca55f9da..aeda025161 100644 --- a/docs/_modules/torch_tensorrt/dynamo/_tracer.html +++ b/docs/_modules/torch_tensorrt/dynamo/_tracer.html @@ -9,7 +9,7 @@ - torch_tensorrt.dynamo._tracer — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + torch_tensorrt.dynamo._tracer — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -272,7 +272,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -313,7 +313,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_modules/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.html b/docs/_modules/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.html index 13fcd7d665..77ebd98cd9 100644 --- a/docs/_modules/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.html +++ b/docs/_modules/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.html @@ -9,7 +9,7 @@ - torch_tensorrt.dynamo.runtime._MutableTorchTensorRTModule — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + torch_tensorrt.dynamo.runtime._MutableTorchTensorRTModule — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -272,7 +272,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -313,7 +313,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_modules/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.html b/docs/_modules/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.html index 113c2314a2..6b2afa2855 100644 --- a/docs/_modules/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.html +++ b/docs/_modules/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.html @@ -9,7 +9,7 @@ - torch_tensorrt.dynamo.runtime._PythonTorchTensorRTModule — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + torch_tensorrt.dynamo.runtime._PythonTorchTensorRTModule — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -272,7 +272,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -313,7 +313,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_modules/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.html b/docs/_modules/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.html index f14e2e6d3e..4023689859 100644 --- a/docs/_modules/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.html +++ b/docs/_modules/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.html @@ -9,7 +9,7 @@ - torch_tensorrt.dynamo.runtime._TorchTensorRTModule — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + torch_tensorrt.dynamo.runtime._TorchTensorRTModule — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -272,7 +272,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -313,7 +313,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_modules/torch_tensorrt/fx/fx2trt.html b/docs/_modules/torch_tensorrt/fx/fx2trt.html index 966c5eaf66..339dc1ced7 100644 --- a/docs/_modules/torch_tensorrt/fx/fx2trt.html +++ b/docs/_modules/torch_tensorrt/fx/fx2trt.html @@ -9,7 +9,7 @@ - torch_tensorrt.fx.fx2trt — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + torch_tensorrt.fx.fx2trt — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -272,7 +272,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -313,7 +313,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_modules/torch_tensorrt/fx/input_tensor_spec.html b/docs/_modules/torch_tensorrt/fx/input_tensor_spec.html index 5097ed4319..b7e4537697 100644 --- a/docs/_modules/torch_tensorrt/fx/input_tensor_spec.html +++ b/docs/_modules/torch_tensorrt/fx/input_tensor_spec.html @@ -9,7 +9,7 @@ - torch_tensorrt.fx.input_tensor_spec — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + torch_tensorrt.fx.input_tensor_spec — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -272,7 +272,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -313,7 +313,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_modules/torch_tensorrt/fx/lower.html b/docs/_modules/torch_tensorrt/fx/lower.html index 08635c03fd..8cb7a715ec 100644 --- a/docs/_modules/torch_tensorrt/fx/lower.html +++ b/docs/_modules/torch_tensorrt/fx/lower.html @@ -9,7 +9,7 @@ - torch_tensorrt.fx.lower — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + torch_tensorrt.fx.lower — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -272,7 +272,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -313,7 +313,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_modules/torch_tensorrt/fx/trt_module.html b/docs/_modules/torch_tensorrt/fx/trt_module.html index aa45ea92b6..2e084c0d4a 100644 --- a/docs/_modules/torch_tensorrt/fx/trt_module.html +++ b/docs/_modules/torch_tensorrt/fx/trt_module.html @@ -9,7 +9,7 @@ - torch_tensorrt.fx.trt_module — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + torch_tensorrt.fx.trt_module — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -272,7 +272,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -313,7 +313,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_modules/torch_tensorrt/logging.html b/docs/_modules/torch_tensorrt/logging.html index b64b25ee35..7865658ab3 100644 --- a/docs/_modules/torch_tensorrt/logging.html +++ b/docs/_modules/torch_tensorrt/logging.html @@ -9,7 +9,7 @@ - torch_tensorrt.logging — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + torch_tensorrt.logging — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -272,7 +272,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -313,7 +313,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_modules/torch_tensorrt/runtime/_multi_device_safe_mode.html b/docs/_modules/torch_tensorrt/runtime/_multi_device_safe_mode.html index a16b9bba12..8752249250 100644 --- a/docs/_modules/torch_tensorrt/runtime/_multi_device_safe_mode.html +++ b/docs/_modules/torch_tensorrt/runtime/_multi_device_safe_mode.html @@ -9,7 +9,7 @@ - torch_tensorrt.runtime._multi_device_safe_mode — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + torch_tensorrt.runtime._multi_device_safe_mode — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -272,7 +272,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -313,7 +313,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_modules/torch_tensorrt/ts/_compile_spec.html b/docs/_modules/torch_tensorrt/ts/_compile_spec.html index 3aee510f3e..ec664db21c 100644 --- a/docs/_modules/torch_tensorrt/ts/_compile_spec.html +++ b/docs/_modules/torch_tensorrt/ts/_compile_spec.html @@ -9,7 +9,7 @@ - torch_tensorrt.ts._compile_spec — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + torch_tensorrt.ts._compile_spec — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -272,7 +272,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -313,7 +313,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_modules/torch_tensorrt/ts/_compiler.html b/docs/_modules/torch_tensorrt/ts/_compiler.html index 0916ff4aef..e3c9ceaeea 100644 --- a/docs/_modules/torch_tensorrt/ts/_compiler.html +++ b/docs/_modules/torch_tensorrt/ts/_compiler.html @@ -9,7 +9,7 @@ - torch_tensorrt.ts._compiler — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + torch_tensorrt.ts._compiler — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -272,7 +272,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -313,7 +313,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_modules/torch_tensorrt/ts/ptq.html b/docs/_modules/torch_tensorrt/ts/ptq.html index 39bfe6bd87..8f84d3f452 100644 --- a/docs/_modules/torch_tensorrt/ts/ptq.html +++ b/docs/_modules/torch_tensorrt/ts/ptq.html @@ -9,7 +9,7 @@ - torch_tensorrt.ts.ptq — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + torch_tensorrt.ts.ptq — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -272,7 +272,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -313,7 +313,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/_sources/tutorials/_rendered_examples/dynamo/index.rst.txt b/docs/_sources/tutorials/_rendered_examples/dynamo/index.rst.txt index 9dfdff54f2..6e5917ae7b 100644 --- a/docs/_sources/tutorials/_rendered_examples/dynamo/index.rst.txt +++ b/docs/_sources/tutorials/_rendered_examples/dynamo/index.rst.txt @@ -148,14 +148,14 @@ a number of ways you can leverage this backend to accelerate inference. .. raw:: html -
    +
    .. only:: html - .. image:: /tutorials/_rendered_examples/dynamo/images/thumb/sphx_glr_vgg16_fp8_ptq_thumb.png + .. image:: /tutorials/_rendered_examples/dynamo/images/thumb/sphx_glr_vgg16_ptq_thumb.png :alt: - :ref:`sphx_glr_tutorials__rendered_examples_dynamo_vgg16_fp8_ptq.py` + :ref:`sphx_glr_tutorials__rendered_examples_dynamo_vgg16_ptq.py` .. raw:: html @@ -195,6 +195,6 @@ a number of ways you can leverage this backend to accelerate inference. /tutorials/_rendered_examples/dynamo/torch_compile_advanced_usage /tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example /tutorials/_rendered_examples/dynamo/torch_compile_resnet_example - /tutorials/_rendered_examples/dynamo/vgg16_fp8_ptq + /tutorials/_rendered_examples/dynamo/vgg16_ptq /tutorials/_rendered_examples/dynamo/custom_kernel_plugins diff --git a/docs/_sources/tutorials/_rendered_examples/dynamo/vgg16_ptq.rst.txt b/docs/_sources/tutorials/_rendered_examples/dynamo/vgg16_ptq.rst.txt new file mode 100644 index 0000000000..3224e138ca --- /dev/null +++ b/docs/_sources/tutorials/_rendered_examples/dynamo/vgg16_ptq.rst.txt @@ -0,0 +1,363 @@ + +.. DO NOT EDIT. +.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. +.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: +.. "tutorials/_rendered_examples/dynamo/vgg16_ptq.py" +.. LINE NUMBERS ARE GIVEN BELOW. + +.. only:: html + + .. note:: + :class: sphx-glr-download-link-note + + :ref:`Go to the end ` + to download the full example code + +.. rst-class:: sphx-glr-example-title + +.. _sphx_glr_tutorials__rendered_examples_dynamo_vgg16_ptq.py: + + +.. _vgg16_ptq: + +Deploy Quantized Models using Torch-TensorRT +====================================================== + +Here we demonstrate how to deploy a model quantized to INT8 or FP8 using the Dynamo frontend of Torch-TensorRT + +.. GENERATED FROM PYTHON SOURCE LINES 11-13 + +Imports and Model Definition +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. GENERATED FROM PYTHON SOURCE LINES 13-125 + +.. code-block:: python + + + import argparse + + import modelopt.torch.quantization as mtq + import torch + import torch.nn as nn + import torch.nn.functional as F + import torch_tensorrt as torchtrt + import torchvision.datasets as datasets + import torchvision.transforms as transforms + from modelopt.torch.quantization.utils import export_torch_mode + + + class VGG(nn.Module): + def __init__(self, layer_spec, num_classes=1000, init_weights=False): + super(VGG, self).__init__() + + layers = [] + in_channels = 3 + for l in layer_spec: + if l == "pool": + layers.append(nn.MaxPool2d(kernel_size=2, stride=2)) + else: + layers += [ + nn.Conv2d(in_channels, l, kernel_size=3, padding=1), + nn.BatchNorm2d(l), + nn.ReLU(), + ] + in_channels = l + + self.features = nn.Sequential(*layers) + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.classifier = nn.Sequential( + nn.Linear(512 * 1 * 1, 4096), + nn.ReLU(), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(), + nn.Dropout(), + nn.Linear(4096, num_classes), + ) + if init_weights: + self._initialize_weights() + + def _initialize_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.constant_(m.bias, 0) + + def forward(self, x): + x = self.features(x) + x = self.avgpool(x) + x = torch.flatten(x, 1) + x = self.classifier(x) + return x + + + def vgg16(num_classes=1000, init_weights=False): + vgg16_cfg = [ + 64, + 64, + "pool", + 128, + 128, + "pool", + 256, + 256, + 256, + "pool", + 512, + 512, + 512, + "pool", + 512, + 512, + 512, + "pool", + ] + return VGG(vgg16_cfg, num_classes, init_weights) + + + PARSER = argparse.ArgumentParser( + description="Load pre-trained VGG model and then tune with FP8 and PTQ. For having a pre-trained VGG model, please refer to https://github.com/pytorch/TensorRT/tree/main/examples/int8/training/vgg16" + ) + PARSER.add_argument( + "--ckpt", type=str, required=True, help="Path to the pre-trained checkpoint" + ) + PARSER.add_argument( + "--batch-size", + default=128, + type=int, + help="Batch size for tuning the model with PTQ and FP8", + ) + PARSER.add_argument( + "--quantize-type", + default="int8", + type=str, + help="quantization type, currently supported int8 or fp8 for PTQ", + ) + args = PARSER.parse_args() + + model = vgg16(num_classes=10, init_weights=False) + model = model.cuda() + + +.. GENERATED FROM PYTHON SOURCE LINES 126-128 + +Load the pre-trained model weights +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. GENERATED FROM PYTHON SOURCE LINES 128-145 + +.. code-block:: python + + + ckpt = torch.load(args.ckpt) + weights = ckpt["model_state_dict"] + + if torch.cuda.device_count() > 1: + from collections import OrderedDict + + new_state_dict = OrderedDict() + for k, v in weights.items(): + name = k[7:] # remove `module.` + new_state_dict[name] = v + weights = new_state_dict + + model.load_state_dict(weights) + # Don't forget to set the model to evaluation mode! + model.eval() + + +.. GENERATED FROM PYTHON SOURCE LINES 146-148 + +Load training dataset and define loss function for PTQ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. GENERATED FROM PYTHON SOURCE LINES 148-175 + +.. code-block:: python + + + training_dataset = datasets.CIFAR10( + root="./data", + train=True, + download=True, + transform=transforms.Compose( + [ + transforms.RandomCrop(32, padding=4), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), + ] + ), + ) + training_dataloader = torch.utils.data.DataLoader( + training_dataset, + batch_size=args.batch_size, + shuffle=True, + num_workers=2, + drop_last=True, + ) + + data = iter(training_dataloader) + images, _ = next(data) + + crit = nn.CrossEntropyLoss() + + +.. GENERATED FROM PYTHON SOURCE LINES 176-178 + +Define Calibration Loop for quantization +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. GENERATED FROM PYTHON SOURCE LINES 178-196 + +.. code-block:: python + + + + def calibrate_loop(model): + # calibrate over the training dataset + total = 0 + correct = 0 + loss = 0.0 + for data, labels in training_dataloader: + data, labels = data.cuda(), labels.cuda(non_blocking=True) + out = model(data) + loss += crit(out, labels) + preds = torch.max(out, 1)[1] + total += labels.size(0) + correct += (preds == labels).sum().item() + + print("PTQ Loss: {:.5f} Acc: {:.2f}%".format(loss / total, 100 * correct / total)) + + + +.. GENERATED FROM PYTHON SOURCE LINES 197-199 + +Tune the pre-trained model with FP8 and PTQ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. GENERATED FROM PYTHON SOURCE LINES 199-207 + +.. code-block:: python + + if args.quantize_type == "int8": + quant_cfg = mtq.INT8_DEFAULT_CFG + elif args.quantize_type == "fp8": + quant_cfg = mtq.FP8_DEFAULT_CFG + # PTQ with in-place replacement to quantized modules + mtq.quantize(model, quant_cfg, forward_loop=calibrate_loop) + # model has FP8 qdq nodes at this point + + +.. GENERATED FROM PYTHON SOURCE LINES 208-210 + +Inference +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. GENERATED FROM PYTHON SOURCE LINES 210-275 + +.. code-block:: python + + + # Load the testing dataset + testing_dataset = datasets.CIFAR10( + root="./data", + train=False, + download=True, + transform=transforms.Compose( + [ + transforms.ToTensor(), + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), + ] + ), + ) + + testing_dataloader = torch.utils.data.DataLoader( + testing_dataset, + batch_size=args.batch_size, + shuffle=False, + num_workers=2, + drop_last=True, + ) # set drop_last=True to drop the last incomplete batch for static shape `torchtrt.dynamo.compile()` + + with torch.no_grad(): + with export_torch_mode(): + # Compile the model with Torch-TensorRT Dynamo backend + input_tensor = images.cuda() + # torch.export.export() failed due to RuntimeError: Attempting to use FunctionalTensor on its own. Instead, please use it with a corresponding FunctionalTensorMode() + from torch.export._trace import _export + + exp_program = _export(model, (input_tensor,)) + if args.quantize_type == "int8": + enabled_precisions = {torch.int8} + elif args.quantize_type == "fp8": + enabled_precisions = {torch.float8_e4m3fn} + trt_model = torchtrt.dynamo.compile( + exp_program, + inputs=[input_tensor], + enabled_precisions=enabled_precisions, + min_block_size=1, + debug=False, + ) + # You can also use torch compile path to compile the model with Torch-TensorRT: + # trt_model = torch.compile(model, backend="tensorrt") + + # Inference compiled Torch-TensorRT model over the testing dataset + total = 0 + correct = 0 + loss = 0.0 + class_probs = [] + class_preds = [] + for data, labels in testing_dataloader: + data, labels = data.cuda(), labels.cuda(non_blocking=True) + out = trt_model(data) + loss += crit(out, labels) + preds = torch.max(out, 1)[1] + class_probs.append([F.softmax(i, dim=0) for i in out]) + class_preds.append(preds) + total += labels.size(0) + correct += (preds == labels).sum().item() + + test_probs = torch.cat([torch.stack(batch) for batch in class_probs]) + test_preds = torch.cat(class_preds) + test_loss = loss / total + test_acc = correct / total + print("Test Loss: {:.5f} Test Acc: {:.2f}%".format(test_loss, 100 * test_acc)) + + +.. rst-class:: sphx-glr-timing + + **Total running time of the script:** ( 0 minutes 0.000 seconds) + + +.. _sphx_glr_download_tutorials__rendered_examples_dynamo_vgg16_ptq.py: + +.. only:: html + + .. container:: sphx-glr-footer sphx-glr-footer-example + + + + + .. container:: sphx-glr-download sphx-glr-download-python + + :download:`Download Python source code: vgg16_ptq.py ` + + .. container:: sphx-glr-download sphx-glr-download-jupyter + + :download:`Download Jupyter notebook: vgg16_ptq.ipynb ` + + +.. only:: html + + .. rst-class:: sphx-glr-signature + + `Gallery generated by Sphinx-Gallery `_ diff --git a/docs/_sources/tutorials/_rendered_examples/index.rst.txt b/docs/_sources/tutorials/_rendered_examples/index.rst.txt index 70947d5651..f68c1fb417 100644 --- a/docs/_sources/tutorials/_rendered_examples/index.rst.txt +++ b/docs/_sources/tutorials/_rendered_examples/index.rst.txt @@ -164,14 +164,14 @@ a number of ways you can leverage this backend to accelerate inference. .. raw:: html -
    +
    .. only:: html - .. image:: /tutorials/_rendered_examples/dynamo/images/thumb/sphx_glr_vgg16_fp8_ptq_thumb.png + .. image:: /tutorials/_rendered_examples/dynamo/images/thumb/sphx_glr_vgg16_ptq_thumb.png :alt: - :ref:`sphx_glr_tutorials__rendered_examples_dynamo_vgg16_fp8_ptq.py` + :ref:`sphx_glr_tutorials__rendered_examples_dynamo_vgg16_ptq.py` .. raw:: html diff --git a/docs/_static/documentation_options.js b/docs/_static/documentation_options.js index 6736d92864..c01a50945c 100644 --- a/docs/_static/documentation_options.js +++ b/docs/_static/documentation_options.js @@ -1,6 +1,6 @@ var DOCUMENTATION_OPTIONS = { URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), - VERSION: 'v2.5.0.dev0+60ec67b', + VERSION: 'v2.5.0.dev0+b3a8cdd', LANGUAGE: 'en', COLLAPSE_INDEX: false, BUILDER: 'html', diff --git a/docs/cli/torchtrtc.html b/docs/cli/torchtrtc.html index 9be5497d7a..f6792e2ae9 100644 --- a/docs/cli/torchtrtc.html +++ b/docs/cli/torchtrtc.html @@ -10,7 +10,7 @@ - torchtrtc — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + torchtrtc — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/contributors/conversion.html b/docs/contributors/conversion.html index be7c511e70..ffb238bfcd 100644 --- a/docs/contributors/conversion.html +++ b/docs/contributors/conversion.html @@ -10,7 +10,7 @@ - Conversion Phase — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Conversion Phase — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/contributors/dynamo_converters.html b/docs/contributors/dynamo_converters.html index d409fad14e..55f9686749 100644 --- a/docs/contributors/dynamo_converters.html +++ b/docs/contributors/dynamo_converters.html @@ -10,7 +10,7 @@ - Writing Dynamo Converters — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Writing Dynamo Converters — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/contributors/lowering.html b/docs/contributors/lowering.html index 86a5049293..76d2a22dc7 100644 --- a/docs/contributors/lowering.html +++ b/docs/contributors/lowering.html @@ -10,7 +10,7 @@ - Lowering Phase — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Lowering Phase — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/contributors/partitioning.html b/docs/contributors/partitioning.html index 7491719997..a06f2621ad 100644 --- a/docs/contributors/partitioning.html +++ b/docs/contributors/partitioning.html @@ -10,7 +10,7 @@ - Partitioning Phase — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Partitioning Phase — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/contributors/phases.html b/docs/contributors/phases.html index edba230bb2..9bf25a6e07 100644 --- a/docs/contributors/phases.html +++ b/docs/contributors/phases.html @@ -10,7 +10,7 @@ - Compiler Phases — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Compiler Phases — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -273,7 +273,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -314,7 +314,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/contributors/runtime.html b/docs/contributors/runtime.html index 27b6f02446..861ff004c3 100644 --- a/docs/contributors/runtime.html +++ b/docs/contributors/runtime.html @@ -10,7 +10,7 @@ - Runtime Phase — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Runtime Phase — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/contributors/system_overview.html b/docs/contributors/system_overview.html index a6cb038f1b..97fa2c3423 100644 --- a/docs/contributors/system_overview.html +++ b/docs/contributors/system_overview.html @@ -10,7 +10,7 @@ - System Overview — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + System Overview — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/contributors/ts_converters.html b/docs/contributors/ts_converters.html index 462038e8e4..69ddafa051 100644 --- a/docs/contributors/ts_converters.html +++ b/docs/contributors/ts_converters.html @@ -10,7 +10,7 @@ - Writing TorchScript Converters — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Writing TorchScript Converters — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/contributors/useful_links.html b/docs/contributors/useful_links.html index 37d950ff17..9fba4ea080 100644 --- a/docs/contributors/useful_links.html +++ b/docs/contributors/useful_links.html @@ -10,7 +10,7 @@ - Useful Links for Torch-TensorRT Development — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Useful Links for Torch-TensorRT Development — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/contributors/writing_dynamo_aten_lowering_passes.html b/docs/contributors/writing_dynamo_aten_lowering_passes.html index fef7270d1c..a4342f98de 100644 --- a/docs/contributors/writing_dynamo_aten_lowering_passes.html +++ b/docs/contributors/writing_dynamo_aten_lowering_passes.html @@ -10,7 +10,7 @@ - Writing Dynamo ATen Lowering Passes — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Writing Dynamo ATen Lowering Passes — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/dynamo/dynamo_export.html b/docs/dynamo/dynamo_export.html index b3609ac9dc..f2e80b40f3 100644 --- a/docs/dynamo/dynamo_export.html +++ b/docs/dynamo/dynamo_export.html @@ -10,7 +10,7 @@ - Compiling Exported Programs with Torch-TensorRT — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Compiling Exported Programs with Torch-TensorRT — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/dynamo/torch_compile.html b/docs/dynamo/torch_compile.html index fecb707f2a..a0531ea32e 100644 --- a/docs/dynamo/torch_compile.html +++ b/docs/dynamo/torch_compile.html @@ -10,7 +10,7 @@ - TensorRT Backend for torch.compile — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + TensorRT Backend for torch.compile — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -40,7 +40,7 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    +
    + + + + + +
    +
    +
    + + + + + + + + + + + +
    +
    +
    + + + + + + + + + + + + + + + + +
    + +
      + +
    • + + + Docs + + > +
    • + + +
    • Deploy Quantized Models using Torch-TensorRT
    • + + +
    • + + + + + +
    • + +
    + + +
    +
    + +
    + Shortcuts +
    +
    + +
    +
    + + + + + + +
    + +
    +
    + + +
    +

    Deploy Quantized Models using Torch-TensorRT

    +

    Here we demonstrate how to deploy a model quantized to INT8 or FP8 using the Dynamo frontend of Torch-TensorRT

    +
    +

    Imports and Model Definition

    +
    import argparse
    +
    +import modelopt.torch.quantization as mtq
    +import torch
    +import torch.nn as nn
    +import torch.nn.functional as F
    +import torch_tensorrt as torchtrt
    +import torchvision.datasets as datasets
    +import torchvision.transforms as transforms
    +from modelopt.torch.quantization.utils import export_torch_mode
    +
    +
    +class VGG(nn.Module):
    +    def __init__(self, layer_spec, num_classes=1000, init_weights=False):
    +        super(VGG, self).__init__()
    +
    +        layers = []
    +        in_channels = 3
    +        for l in layer_spec:
    +            if l == "pool":
    +                layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
    +            else:
    +                layers += [
    +                    nn.Conv2d(in_channels, l, kernel_size=3, padding=1),
    +                    nn.BatchNorm2d(l),
    +                    nn.ReLU(),
    +                ]
    +                in_channels = l
    +
    +        self.features = nn.Sequential(*layers)
    +        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
    +        self.classifier = nn.Sequential(
    +            nn.Linear(512 * 1 * 1, 4096),
    +            nn.ReLU(),
    +            nn.Dropout(),
    +            nn.Linear(4096, 4096),
    +            nn.ReLU(),
    +            nn.Dropout(),
    +            nn.Linear(4096, num_classes),
    +        )
    +        if init_weights:
    +            self._initialize_weights()
    +
    +    def _initialize_weights(self):
    +        for m in self.modules():
    +            if isinstance(m, nn.Conv2d):
    +                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
    +                if m.bias is not None:
    +                    nn.init.constant_(m.bias, 0)
    +            elif isinstance(m, nn.BatchNorm2d):
    +                nn.init.constant_(m.weight, 1)
    +                nn.init.constant_(m.bias, 0)
    +            elif isinstance(m, nn.Linear):
    +                nn.init.normal_(m.weight, 0, 0.01)
    +                nn.init.constant_(m.bias, 0)
    +
    +    def forward(self, x):
    +        x = self.features(x)
    +        x = self.avgpool(x)
    +        x = torch.flatten(x, 1)
    +        x = self.classifier(x)
    +        return x
    +
    +
    +def vgg16(num_classes=1000, init_weights=False):
    +    vgg16_cfg = [
    +        64,
    +        64,
    +        "pool",
    +        128,
    +        128,
    +        "pool",
    +        256,
    +        256,
    +        256,
    +        "pool",
    +        512,
    +        512,
    +        512,
    +        "pool",
    +        512,
    +        512,
    +        512,
    +        "pool",
    +    ]
    +    return VGG(vgg16_cfg, num_classes, init_weights)
    +
    +
    +PARSER = argparse.ArgumentParser(
    +    description="Load pre-trained VGG model and then tune with FP8 and PTQ. For having a pre-trained VGG model, please refer to https://github.com/pytorch/TensorRT/tree/main/examples/int8/training/vgg16"
    +)
    +PARSER.add_argument(
    +    "--ckpt", type=str, required=True, help="Path to the pre-trained checkpoint"
    +)
    +PARSER.add_argument(
    +    "--batch-size",
    +    default=128,
    +    type=int,
    +    help="Batch size for tuning the model with PTQ and FP8",
    +)
    +PARSER.add_argument(
    +    "--quantize-type",
    +    default="int8",
    +    type=str,
    +    help="quantization type, currently supported int8 or fp8 for PTQ",
    +)
    +args = PARSER.parse_args()
    +
    +model = vgg16(num_classes=10, init_weights=False)
    +model = model.cuda()
    +
    +
    +
    +
    +

    Load the pre-trained model weights

    +
    ckpt = torch.load(args.ckpt)
    +weights = ckpt["model_state_dict"]
    +
    +if torch.cuda.device_count() > 1:
    +    from collections import OrderedDict
    +
    +    new_state_dict = OrderedDict()
    +    for k, v in weights.items():
    +        name = k[7:]  # remove `module.`
    +        new_state_dict[name] = v
    +    weights = new_state_dict
    +
    +model.load_state_dict(weights)
    +# Don't forget to set the model to evaluation mode!
    +model.eval()
    +
    +
    +
    +
    +

    Load training dataset and define loss function for PTQ

    +
    training_dataset = datasets.CIFAR10(
    +    root="./data",
    +    train=True,
    +    download=True,
    +    transform=transforms.Compose(
    +        [
    +            transforms.RandomCrop(32, padding=4),
    +            transforms.RandomHorizontalFlip(),
    +            transforms.ToTensor(),
    +            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    +        ]
    +    ),
    +)
    +training_dataloader = torch.utils.data.DataLoader(
    +    training_dataset,
    +    batch_size=args.batch_size,
    +    shuffle=True,
    +    num_workers=2,
    +    drop_last=True,
    +)
    +
    +data = iter(training_dataloader)
    +images, _ = next(data)
    +
    +crit = nn.CrossEntropyLoss()
    +
    +
    +
    +
    +

    Define Calibration Loop for quantization

    +
    def calibrate_loop(model):
    +    # calibrate over the training dataset
    +    total = 0
    +    correct = 0
    +    loss = 0.0
    +    for data, labels in training_dataloader:
    +        data, labels = data.cuda(), labels.cuda(non_blocking=True)
    +        out = model(data)
    +        loss += crit(out, labels)
    +        preds = torch.max(out, 1)[1]
    +        total += labels.size(0)
    +        correct += (preds == labels).sum().item()
    +
    +    print("PTQ Loss: {:.5f} Acc: {:.2f}%".format(loss / total, 100 * correct / total))
    +
    +
    +
    +
    +

    Tune the pre-trained model with FP8 and PTQ

    +
    if args.quantize_type == "int8":
    +    quant_cfg = mtq.INT8_DEFAULT_CFG
    +elif args.quantize_type == "fp8":
    +    quant_cfg = mtq.FP8_DEFAULT_CFG
    +# PTQ with in-place replacement to quantized modules
    +mtq.quantize(model, quant_cfg, forward_loop=calibrate_loop)
    +# model has FP8 qdq nodes at this point
    +
    +
    +
    +
    +

    Inference

    +
    # Load the testing dataset
    +testing_dataset = datasets.CIFAR10(
    +    root="./data",
    +    train=False,
    +    download=True,
    +    transform=transforms.Compose(
    +        [
    +            transforms.ToTensor(),
    +            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    +        ]
    +    ),
    +)
    +
    +testing_dataloader = torch.utils.data.DataLoader(
    +    testing_dataset,
    +    batch_size=args.batch_size,
    +    shuffle=False,
    +    num_workers=2,
    +    drop_last=True,
    +)  # set drop_last=True to drop the last incomplete batch for static shape `torchtrt.dynamo.compile()`
    +
    +with torch.no_grad():
    +    with export_torch_mode():
    +        # Compile the model with Torch-TensorRT Dynamo backend
    +        input_tensor = images.cuda()
    +        # torch.export.export() failed due to RuntimeError: Attempting to use FunctionalTensor on its own. Instead, please use it with a corresponding FunctionalTensorMode()
    +        from torch.export._trace import _export
    +
    +        exp_program = _export(model, (input_tensor,))
    +        if args.quantize_type == "int8":
    +            enabled_precisions = {torch.int8}
    +        elif args.quantize_type == "fp8":
    +            enabled_precisions = {torch.float8_e4m3fn}
    +        trt_model = torchtrt.dynamo.compile(
    +            exp_program,
    +            inputs=[input_tensor],
    +            enabled_precisions=enabled_precisions,
    +            min_block_size=1,
    +            debug=False,
    +        )
    +        # You can also use torch compile path to compile the model with Torch-TensorRT:
    +        # trt_model = torch.compile(model, backend="tensorrt")
    +
    +        # Inference compiled Torch-TensorRT model over the testing dataset
    +        total = 0
    +        correct = 0
    +        loss = 0.0
    +        class_probs = []
    +        class_preds = []
    +        for data, labels in testing_dataloader:
    +            data, labels = data.cuda(), labels.cuda(non_blocking=True)
    +            out = trt_model(data)
    +            loss += crit(out, labels)
    +            preds = torch.max(out, 1)[1]
    +            class_probs.append([F.softmax(i, dim=0) for i in out])
    +            class_preds.append(preds)
    +            total += labels.size(0)
    +            correct += (preds == labels).sum().item()
    +
    +        test_probs = torch.cat([torch.stack(batch) for batch in class_probs])
    +        test_preds = torch.cat(class_preds)
    +        test_loss = loss / total
    +        test_acc = correct / total
    +        print("Test Loss: {:.5f} Test Acc: {:.2f}%".format(test_loss, 100 * test_acc))
    +
    +
    +

    Total running time of the script: ( 0 minutes 0.000 seconds)

    + +

    Gallery generated by Sphinx-Gallery

    +
    +
    + + +
    + +
    +
    + + + + +
    + + + +
    +

    + © Copyright 2024, NVIDIA Corporation. + +

    +
    + +
    + Built with Sphinx using a theme provided by Read the Docs. +
    + + +
    + +
    +
    + + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    +
    +
    +

    Docs

    +

    Access comprehensive developer documentation for PyTorch

    + View Docs +
    + +
    +

    Tutorials

    +

    Get in-depth tutorials for beginners and advanced developers

    + View Tutorials +
    + +
    +

    Resources

    +

    Find development resources and get your questions answered

    + View Resources +
    +
    +
    +
    + + + + + + + + + +
    +
    +
    +
    + + +
    +
    +
    + + +
    + + + + + + + + \ No newline at end of file diff --git a/docs/tutorials/_rendered_examples/index.html b/docs/tutorials/_rendered_examples/index.html index 7f1755974f..2df289be4a 100644 --- a/docs/tutorials/_rendered_examples/index.html +++ b/docs/tutorials/_rendered_examples/index.html @@ -10,7 +10,7 @@ - Torch-TensorRT Tutorials — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Torch-TensorRT Tutorials — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -273,7 +273,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -314,7 +314,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    @@ -472,7 +470,7 @@

    Dynamo / torch.
  • Using Custom Kernels within TensorRT Engines with Torch-TensorRT: Creating a plugin to use a custom kernel inside TensorRT engines

  • Refit TenorRT Graph Module with Torch-TensorRT: Refitting a compiled TensorRT Graph Module with updated weights

  • Mutable Torch TensorRT Module: Compile, use, and modify TensorRT Graph Module with MutableTorchTensorRTModule

  • -
  • Deploy Quantized Models using Torch-TensorRT: Compiling a VGG16 model with FP8 and PTQ using torch.compile

  • +
  • vgg16_fp8_ptq: Compiling a VGG16 model with FP8 and PTQ using torch.compile

  • Torch Compile Stable Diffusion

    @@ -495,8 +493,8 @@

    Dynamo / torch.

    Compiling ResNet using the Torch-TensorRT torch.compile Backend

    Compiling ResNet using the Torch-TensorRT torch.compile Backend
    -
    +

    Deploy Quantized Models using Torch-TensorRT

    Deploy Quantized Models using Torch-TensorRT

    Using Custom Kernels within TensorRT Engines with Torch-TensorRT

    diff --git a/docs/tutorials/notebooks.html b/docs/tutorials/notebooks.html index 65120bd6cc..a4edc64f90 100644 --- a/docs/tutorials/notebooks.html +++ b/docs/tutorials/notebooks.html @@ -10,7 +10,7 @@ - Example notebooks — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Example notebooks — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/tutorials/serving_torch_tensorrt_with_triton.html b/docs/tutorials/serving_torch_tensorrt_with_triton.html index 17db46c2c8..3f41b7ce44 100644 --- a/docs/tutorials/serving_torch_tensorrt_with_triton.html +++ b/docs/tutorials/serving_torch_tensorrt_with_triton.html @@ -10,7 +10,7 @@ - Serving a Torch-TensorRT model with Triton — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Serving a Torch-TensorRT model with Triton — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/user_guide/dynamic_shapes.html b/docs/user_guide/dynamic_shapes.html index 7365583ba0..1ce746882e 100644 --- a/docs/user_guide/dynamic_shapes.html +++ b/docs/user_guide/dynamic_shapes.html @@ -10,7 +10,7 @@ - Dynamic shapes with Torch-TensorRT — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Dynamic shapes with Torch-TensorRT — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/user_guide/runtime.html b/docs/user_guide/runtime.html index d84fe43671..bbd9c4efae 100644 --- a/docs/user_guide/runtime.html +++ b/docs/user_guide/runtime.html @@ -10,7 +10,7 @@ - Deploying Torch-TensorRT Programs — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Deploying Torch-TensorRT Programs — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/user_guide/saving_models.html b/docs/user_guide/saving_models.html index d997d544e4..5dde9e4277 100644 --- a/docs/user_guide/saving_models.html +++ b/docs/user_guide/saving_models.html @@ -10,7 +10,7 @@ - Saving models compiled with Torch-TensorRT — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Saving models compiled with Torch-TensorRT — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/user_guide/torch_tensorrt_explained.html b/docs/user_guide/torch_tensorrt_explained.html index fc0de9056c..e93a2c6a02 100644 --- a/docs/user_guide/torch_tensorrt_explained.html +++ b/docs/user_guide/torch_tensorrt_explained.html @@ -10,7 +10,7 @@ - Torch-TensorRT Explained — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + Torch-TensorRT Explained — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation

    diff --git a/docs/user_guide/using_dla.html b/docs/user_guide/using_dla.html index 247a2f21c2..b0e3f18e0e 100644 --- a/docs/user_guide/using_dla.html +++ b/docs/user_guide/using_dla.html @@ -10,7 +10,7 @@ - DLA — Torch-TensorRT v2.5.0.dev0+60ec67b documentation + DLA — Torch-TensorRT v2.5.0.dev0+b3a8cdd documentation @@ -275,7 +275,7 @@
    - v2.5.0.dev0+60ec67b + v2.5.0.dev0+b3a8cdd
    @@ -316,7 +316,6 @@
  • Deploying Torch-TensorRT Programs
  • DLA
  • Torch Compile Advanced Usage
  • -
  • Deploy Quantized Models using Torch-TensorRT
  • Dynamo Frontend

    Python API Documentation