Skip to content

🐛 [Bug] [NGC] L2 Dynamo Test fail on Spark #3858

@apbose

Description

@apbose

Bug:

FAILED models/test_models.py::test_resnet18_torch_exec_ops - AssertionError: ...

TRT 10.13.3.9
Pytorch 2.10.0a0+b558c986e8

Error:

2025-10-11T05:44:58.858100Z 01O =================================== FAILURES ===================================
2025-10-11T05:44:58.858110Z 01O _________________________ test_resnet18_torch_exec_ops _________________________
2025-10-11T05:44:58.858130Z 01O 
2025-10-11T05:44:58.858140Z 01O ir = 'dynamo'
2025-10-11T05:44:58.858150Z 01O 
2025-10-11T05:44:58.858150Z 01O     @unittest.skipIf(
2025-10-11T05:44:58.858160Z 01O         not importlib.util.find_spec("torchvision"), "torchvision not installed"
2025-10-11T05:44:58.858180Z 01O     )
2025-10-11T05:44:58.858180Z 01O     @unittest.skipIf(
2025-10-11T05:44:58.858190Z 01O         platform.system().lower().startswith("windows"),
2025-10-11T05:44:58.858200Z 01O         "Windows cu130 has access violation issue with this test case, skip it for now",
2025-10-11T05:44:58.858220Z 01O     )
2025-10-11T05:44:58.858220Z 01O     def test_resnet18_torch_exec_ops(ir):
2025-10-11T05:44:58.858230Z 01O         model = models.resnet18(pretrained=True).eval().to("cuda")
2025-10-11T05:44:58.858240Z 01O         input = torch.randn((1, 3, 224, 224)).to("cuda")
2025-10-11T05:44:58.858250Z 01O     
2025-10-11T05:44:58.858260Z 01O         compile_spec = {
2025-10-11T05:44:58.858260Z 01O             "inputs": [
2025-10-11T05:44:58.858270Z 01O                 torchtrt.Input(
2025-10-11T05:44:58.858280Z 01O                     min_shape=(1, 3, 224, 224),
2025-10-11T05:44:58.858290Z 01O                     opt_shape=(8, 3, 224, 224),
2025-10-11T05:44:58.858300Z 01O                     max_shape=(16, 3, 224, 224),
2025-10-11T05:44:58.858300Z 01O                     dtype=torch.float32,
2025-10-11T05:44:58.858310Z 01O                 )
2025-10-11T05:44:58.858320Z 01O             ],
2025-10-11T05:44:58.858330Z 01O             "ir": ir,
2025-10-11T05:44:58.858330Z 01O             "enabled_precisions": {torch.float32, torch.float16},
2025-10-11T05:44:58.858340Z 01O             "min_block_size": 1,
2025-10-11T05:44:58.858350Z 01O             "output_format": "exported_program",
2025-10-11T05:44:58.858360Z 01O             "cache_built_engines": True,
2025-10-11T05:44:58.858370Z 01O             "reuse_cached_engines": True,
2025-10-11T05:44:58.858380Z 01O             "torch_executed_ops": {torch.ops.aten.matmul, "torch.ops.aten.add"},
2025-10-11T05:44:58.858390Z 01O         }
2025-10-11T05:44:58.858390Z 01O     
2025-10-11T05:44:58.858400Z 01O         trt_mod = torchtrt.compile(model, **compile_spec)
2025-10-11T05:44:58.858410Z 01O         cos_sim = cosine_similarity(model(input), trt_mod(input))
2025-10-11T05:44:58.858420Z 01O >       assertions.assertTrue(
2025-10-11T05:44:58.858430Z 01O             cos_sim > COSINE_THRESHOLD,
2025-10-11T05:44:58.858430Z 01O             msg=f"Resnet18 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
2025-10-11T05:44:58.858450Z 01O         )
2025-10-11T05:44:58.858450Z 01O 
2025-10-11T05:44:58.858460Z 01O models/test_models.py:133: 
2025-10-11T05:44:58.858470Z 01O _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
2025-10-11T05:44:58.858480Z 01O 
2025-10-11T05:44:58.858480Z 01O self = <unittest.case.TestCase testMethod=runTest>, expr = False
2025-10-11T05:44:58.858490Z 01O msg = "False is not true : Resnet18 TRT outputs don't match with the original model. Cosine sim score: 0.32403483986854553 Threshold: 0.99"
2025-10-11T05:44:58.858510Z 01O 
2025-10-11T05:44:58.858510Z 01O     def assertTrue(self, expr, msg=None):
2025-10-11T05:44:58.858520Z 01O         """Check that the expression is true."""
2025-10-11T05:44:58.858530Z 01O         if not expr:
2025-10-11T05:44:58.858540Z 01O             msg = self._formatMessage(msg, "%s is not true" % safe_repr(expr))
2025-10-11T05:44:58.858550Z 01O >           raise self.failureException(msg)
2025-10-11T05:44:58.858560Z 01O E           AssertionError: False is not true : Resnet18 TRT outputs don't match with the original model. Cosine sim score: 0.32403483986854553 Threshold: 0.99
2025-10-11T05:44:58.858970Z 01O 
2025-10-11T05:44:58.858980Z 01O /usr/lib/python3.12/unittest/case.py:727: AssertionError
2025-10-11T05:44:58.858990Z 01O ------------------------------ Captured log call -------------------------------

Metadata

Metadata

Assignees

Labels

bugSomething isn't working

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions