Added rename table for TRT engine, test for output lists

borisfom · borisfom · commit d07a57f5ed8d · 2024-11-01T18:12:24.000-07:00
Signed-off-by: Boris Fomitchev &lt;bfomitchev@nvidia.com&gt;
diff --git a/monai/networks/trt_compiler.py b/monai/networks/trt_compiler.py
@@ -167,7 +167,7 @@ def set_inputs(self, feed_dict, stream):
 
         def try_set_inputs():
             for binding in self.input_names:
-                t = feed_dict[binding]
+                t = feed_dict.get(self.input_table[binding], None)
                 if t is not None:
                     t = t.contiguous()
                     shape = t.shape
@@ -222,6 +222,10 @@ def infer(self, stream, use_cuda_graph=False):
         return self.tensors
 
 
+def make_tensor(d):
+    return d if isinstance(d, torch.Tensor) else torch.tensor(d).cuda()
+
+
 def unroll_input(input_names, input_example):
     # Simulate list/tuple unrolling during ONNX export
     unrolled_input = {}
@@ -230,9 +234,9 @@ def unroll_input(input_names, input_example):
         if val is not None:
             if isinstance(val, list | tuple):
                 for i in range(len(val)):
-                    unrolled_input[f"{name}_{i}"] = val[i]
+                    unrolled_input[f"{name}_{i}"] = make_tensor(val[i])
             else:
-                unrolled_input[name] = val
+                unrolled_input[name] = make_tensor(val)
     return unrolled_input
 
 
@@ -375,8 +379,8 @@ def __init__(
             for i in range(len(self.argspec.defaults)):
                 d = self.argspec.defaults[-i - 1]
                 if d is not None:
-                    d = torch.tensor(d).cuda()
-                self.defaults[self.argspec.args[-i - 1]] = d
+                    d = make_tensor(d)
+                    self.defaults[self.argspec.args[-i - 1]] = d
 
         self.input_names = input_names
         self.old_forward = model.forward
@@ -398,7 +402,16 @@ def _load_engine(self):
         """
         try:
             self.engine = TRTEngine(self.plan_path, self.logger)
-            self.logger.info(f"Engine loaded, inputs:{self.engine.input_names}")
+            # Make sure we have names correct
+            input_table = {}
+            for name in self.engine.input_names:
+                if name.startswith("__") and name not in self.input_names:
+                    orig_name = name[2:]
+                else:
+                    orig_name = name
+                input_table[name] = orig_name
+            self.engine.input_table = input_table
+            self.logger.info(f"Engine loaded, inputs:{self.engine.input_table}")
         except Exception as e:
             self.logger.info(f"Exception while loading the engine:\n{e}")
 
diff --git a/monai/networks/utils.py b/monai/networks/utils.py
@@ -703,7 +703,7 @@ def convert_to_onnx(
             onnx_inputs,
             f=f,
             input_names=input_names,
-            output_names=output_names,
+            output_names=output_names or None,
             dynamic_axes=dynamic_axes,
             opset_version=opset_version,
             do_constant_folding=do_constant_folding,
diff --git a/tests/test_trt_compile.py b/tests/test_trt_compile.py
@@ -13,6 +13,7 @@
 
 import tempfile
 import unittest
+from typing import List
 
 import torch
 from parameterized import parameterized
@@ -32,6 +33,19 @@
 TEST_CASE_2 = ["fp16"]
 
 
+class ListAdd(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x: List[torch.Tensor], y: torch.Tensor, z: torch.Tensor, bs: float = float(0.1)):
+        y1 = y.clone()
+        x1 = x.copy()
+        z1 = z + y
+        for xi in x:
+            y1 = y1 + xi + bs
+        return x1, [y1, z1], y1 + z1
+
+
 @skip_if_windows
 @skip_if_no_cuda
 @skip_if_quick
@@ -68,6 +82,23 @@ def test_handler(self):
             net1.forward(torch.tensor([[0.0, 1.0], [1.0, 2.0]], device="cuda"))
             self.assertIsNotNone(net1._trt_compiler.engine)
 
+    def test_lists(self):
+        model = ListAdd().cuda()
+
+        with torch.no_grad(), tempfile.TemporaryDirectory() as tmpdir:
+            args = {"output_lists": [[-1], [2], []], "export_args": {"dynamo": False, "verbose": True}}
+            x = torch.randn(1, 16).to("cuda")
+            y = torch.randn(1, 16).to("cuda")
+            z = torch.randn(1, 16).to("cuda")
+            input_example = ([x, y, z], y.clone(), z.clone())
+            output_example = model(*input_example)
+            trt_compile(model, f"{tmpdir}/test_lists", args=args)
+            self.assertIsNone(model._trt_compiler.engine)
+            trt_output = model(*input_example)
+            # Check that lazy TRT build succeeded
+            self.assertIsNotNone(model._trt_compiler.engine)
+            torch.testing.assert_close(trt_output, output_example, rtol=0.01, atol=0.01)
+
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
     @unittest.skipUnless(has_sam, "Requires SAM installation")
     def test_cell_sam_wrapper_value(self, precision):
@@ -76,11 +107,7 @@ def test_cell_sam_wrapper_value(self, precision):
             model.eval()
             input_example = torch.randn(1, 3, 128, 128).to("cuda")
             output_example = model(input_example)
-            trt_compile(
-                model,
-                f"{tmpdir}/test_cell_sam_wrapper_trt_compile",
-                args={"precision": precision},
-            )
+            trt_compile(model, f"{tmpdir}/test_cell_sam_wrapper_trt_compile", args={"precision": precision})
             self.assertIsNone(model._trt_compiler.engine)
             trt_output = model(input_example)
             # Check that lazy TRT build succeeded