From 212019b1122d2c8f9ecaa37988443de83018aedf Mon Sep 17 00:00:00 2001
From: gs-olive <113141689+gs-olive@users.noreply.github.com>
Date: Tue, 14 Nov 2023 21:19:42 -0800
Subject: [PATCH] feat: Add optional filepath to save

- Add detailed layer information for excluded ops
---
 py/torch_tensorrt/dynamo/_DryRunTracker.py | 56 ++++++++++++++++++++--
 py/torch_tensorrt/dynamo/_compiler.py      |  6 +++
 py/torch_tensorrt/dynamo/_settings.py      |  9 ++--
 3 files changed, 64 insertions(+), 7 deletions(-)

diff --git a/py/torch_tensorrt/dynamo/_DryRunTracker.py b/py/torch_tensorrt/dynamo/_DryRunTracker.py
index 85cc23165a..031fce2e73 100644
--- a/py/torch_tensorrt/dynamo/_DryRunTracker.py
+++ b/py/torch_tensorrt/dynamo/_DryRunTracker.py
@@ -1,9 +1,14 @@
 import logging
 import math
+import operator
+import os
 from dataclasses import dataclass, field
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Union
 
+import torch
 from torch_tensorrt.dynamo._settings import CompilationSettings
+from torch_tensorrt.dynamo.conversion._ConverterRegistry import ConverterRegistry
+from torch_tensorrt.dynamo.conversion.converter_utils import get_node_name
 
 logger = logging.getLogger(__name__)
 
@@ -44,6 +49,7 @@ class DryRunTracker:
         tensorrt_graph_count (int): Number of TensorRT engines to be generated
         compilation_settings (CompilationSettings): User Compilation Settings
         unsupported_ops (Dict[str, int]): Set of operators not supported in TRT
+        to_run_in_torch (List[str]): Set of nodes to run in Torch
     """
 
     total_ops_in_graph: int = 0
@@ -58,9 +64,12 @@ class DryRunTracker:
         default_factory=CompilationSettings
     )
     unsupported_ops: Dict[str, int] = field(default_factory=dict)
+    to_run_in_torch: List[str] = field(default_factory=list)
 
 
-def dryrun_stats_display(dryrun_tracker: DryRunTracker, dryrun_enabled: bool) -> None:
+def dryrun_stats_display(
+    dryrun_tracker: DryRunTracker, dryrun_enabled: Union[bool, str]
+) -> None:
     """Displays statistics about the dryrun either to debug logs or stdout"""
     formatted_stats = "\n"
 
@@ -71,7 +80,19 @@ def dryrun_stats_display(dryrun_tracker: DryRunTracker, dryrun_enabled: bool) ->
         f"of which {dryrun_tracker.supported_ops_in_graph} operators are supported, "
         f"{round(dryrun_tracker.supported_ops_in_graph*100/dryrun_tracker.total_ops_in_graph, 2)}% coverage\n\n"
     )
-    formatted_stats += f"The following ops are currently unsupported and set to run in Torch: {dryrun_tracker.unsupported_ops}\n\n"
+    if dryrun_tracker.unsupported_ops:
+        parsed_ops = "\n".join(
+            [f"{str(k)}: {str(v)}" for k, v in dryrun_tracker.unsupported_ops.items()]
+        )
+        formatted_stats += f"The following ops are currently unsupported or excluded from conversion, and are listed with their op-count in the graph:\n {parsed_ops}\n\n"
+
+    if dryrun_tracker.to_run_in_torch:
+        formatted_nodes = "\n".join(dryrun_tracker.to_run_in_torch)
+        formatted_stats += (
+            f"The following nodes are currently set to run in Torch:\n{formatted_nodes}\n"
+            "Note: Some of the above nodes may be supported, but were not included in a TRT graph by the partitioner\n\n"
+        )
+
     formatted_stats += f"Compiled with: {dryrun_tracker.compilation_settings}\n\n"
 
     assert len(dryrun_tracker.per_subgraph_data) == dryrun_tracker.tensorrt_graph_count
@@ -184,8 +205,17 @@ def dryrun_stats_display(dryrun_tracker: DryRunTracker, dryrun_enabled: bool) ->
         )
 
     # If user specified "dryrun=True", print to stdout, else debug
+    # If user specified a filepath, save the output to the path as well
     if dryrun_enabled:
         print(formatted_stats)
+        if isinstance(dryrun_enabled, str):
+            if os.path.exists(dryrun_enabled):
+                logger.warning(
+                    f"File already exists at path {dryrun_enabled}, not saving dryrun output"
+                )
+            else:
+                with open(dryrun_enabled, "w+") as f:
+                    f.write(formatted_stats)
     else:
         logger.debug(formatted_stats)
 
@@ -225,3 +255,23 @@ def input_formatter_helper(shapes: Any, dtypes: Any) -> str:
             )
 
     return input_formatter_helper(shapes, dtypes)[:-2]
+
+
+def parse_non_trt_nodes(graph_module: torch.fx.GraphModule) -> List[str]:
+    """Parses call_function and call_method nodes from a GraphModule
+    Excludes getitem nodes
+
+    Returns a string representation of the nodes
+    """
+    to_run_in_torch = []
+    for node in graph_module.graph.nodes:
+        # getitem nodes are excluded since they are a Tensor-collection op
+        if (
+            node.op in ("call_function", "call_method")
+            and node.target != operator.getitem
+        ):
+            to_run_in_torch.append(
+                f"Node: {ConverterRegistry.qualified_name_or_str(node.target)}, "
+                f"with layer location: {get_node_name(node)}"
+            )
+    return to_run_in_torch
diff --git a/py/torch_tensorrt/dynamo/_compiler.py b/py/torch_tensorrt/dynamo/_compiler.py
index d382f26db6..23e32e2b65 100644
--- a/py/torch_tensorrt/dynamo/_compiler.py
+++ b/py/torch_tensorrt/dynamo/_compiler.py
@@ -41,6 +41,7 @@
     DryRunTracker,
     PerSubgraphData,
     dryrun_stats_display,
+    parse_non_trt_nodes,
 )
 from torch_tensorrt.dynamo.conversion import (
     CompilationSettings,
@@ -319,6 +320,10 @@ def compile_module(
 
     dryrun_tracker.unsupported_ops = supported_ops.unsupported_operators
 
+    # The global partitioner leaves non-TRT nodes as-is
+    if not settings.use_fast_partitioner:
+        dryrun_tracker.to_run_in_torch.extend(parse_non_trt_nodes(partitioned_module))
+
     # Store TRT replicas of Torch subgraphs
     trt_modules = {}
     # Iterate over all components that can be accelerated
@@ -327,6 +332,7 @@ def compile_module(
         submodule = getattr(partitioned_module, name)
         # Criteria for a module to be convertible to TRT
         if settings.use_fast_partitioner and "_run_on_acc" not in name:
+            dryrun_tracker.to_run_in_torch.extend(parse_non_trt_nodes(submodule))
             continue
 
         subgraph_data = PerSubgraphData()
diff --git a/py/torch_tensorrt/dynamo/_settings.py b/py/torch_tensorrt/dynamo/_settings.py
index 00c3d95a0e..60990bda99 100644
--- a/py/torch_tensorrt/dynamo/_settings.py
+++ b/py/torch_tensorrt/dynamo/_settings.py
@@ -1,5 +1,5 @@
 from dataclasses import dataclass, field
-from typing import Optional, Set
+from typing import Optional, Set, Union
 
 import torch
 from tensorrt import EngineCapability
@@ -64,8 +64,9 @@ class CompilationSettings:
         dla_sram_size (int): Fast software managed RAM used by DLA to communicate within a layer.
         dla_local_dram_size (int): Host RAM used by DLA to share intermediate tensor data across operations
         dla_global_dram_size (int): Host RAM used by DLA to store weights and metadata for execution
-        dryrun (bool): Toggle "Dryrun" mode, which runs everything through partitioning, short of conversion to
-            TRT Engines. Prints detailed logs of the graph structure and nature of partitioning
+        dryrun (Union[bool, str]): Toggle "Dryrun" mode, which runs everything through partitioning, short of conversion to
+            TRT Engines. Prints detailed logs of the graph structure and nature of partitioning. Optionally saves the
+            ouptut to a file if a string path is specified
     """
 
     precision: torch.dtype = PRECISION
@@ -91,4 +92,4 @@ class CompilationSettings:
     dla_sram_size: int = DLA_SRAM_SIZE
     dla_local_dram_size: int = DLA_LOCAL_DRAM_SIZE
     dla_global_dram_size: int = DLA_GLOBAL_DRAM_SIZE
-    dryrun: bool = DRYRUN
+    dryrun: Union[bool, str] = DRYRUN