From de767507e5ce956ebedd5ee5e991aa88c017b68f Mon Sep 17 00:00:00 2001
From: ganler <jaway.liu@gmail.com>
Date: Tue, 9 Jan 2024 07:35:34 -0600
Subject: [PATCH] feat(cli): bug report synthesis

---
 doc/cli.md                            | 20 +++++++++
 nnsmith/cli/report_syn.py             | 61 +++++++++++++++++++++++++++
 nnsmith/logging.py                    |  1 +
 nnsmith/materialize/torch/__init__.py |  2 +-
 setup.cfg                             |  1 +
 5 files changed, 84 insertions(+), 1 deletion(-)
 create mode 100644 nnsmith/cli/report_syn.py

diff --git a/doc/cli.md b/doc/cli.md
index 98208aaf..5b7503ac 100644
--- a/doc/cli.md
+++ b/doc/cli.md
@@ -121,6 +121,26 @@ yes | nnsmith.model_gen model.type=torch mgen.method=symbolic-cinit \
                                          debug.viz=true
 ```
 
+## Synthesize bug reports
+
+`nnsmith.report_syn` can synthesize bug reports given a model (read from filesystem) and a backend target (user provided).
+It prints a self-contained Python script:
+
+1. For most of the cases, it should be able to directly reproduce the bug.
+2. If not, it should serve as a good starting point and developers can modify it a bit to reproduce the bug.
+
+> **Note**
+>
+> This is an experimental feature and only works for PyTorch models.
+
+> **Warning**
+>
+> `nnsmith.report_syn` is not guaranteed to reproduce the bug. For strict bug reproduction, please use `nnsmith.model_exec` instead.
+
+```shell
+nnsmith.report_syn backend.type="pt2 backend@inductor" model.type=torch model.path=nnsmith_output/model.pth
+```
+
 ## Misc
 
 TensorFlow logging can be very noisy. Use `TF_CPP_MIN_LOG_LEVEL=3` as environmental variable to depress that.
diff --git a/nnsmith/cli/report_syn.py b/nnsmith/cli/report_syn.py
new file mode 100644
index 00000000..e0ae9fc1
--- /dev/null
+++ b/nnsmith/cli/report_syn.py
@@ -0,0 +1,61 @@
+import os
+
+import hydra
+from omegaconf import DictConfig, ListConfig
+
+from nnsmith.backends import BackendFactory
+from nnsmith.logging import RENDER_LOG
+from nnsmith.materialize import Model, Render
+
+
+@hydra.main(version_base=None, config_path="../config", config_name="main")
+def main(cfg: DictConfig):
+    RENDER_LOG.warning(
+        "The duty of `nnsmith.report_syn` is to produce a BASIC but executable Python script. It may not reproduce the original bug as the report may not use the original seed, input data, and output oracles. If you want to more strictly reproduce the bug, please use `nnsmith.model_exec`."
+    )
+
+    cmp_cfg = cfg["cmp"]
+    model_cfg = cfg["model"]
+    ModelType = Model.init(model_cfg["type"], cfg["backend"]["target"])
+
+    if isinstance(model_cfg["path"], ListConfig):
+        model_paths = model_cfg["path"]
+    else:
+        model_paths = [model_cfg["path"]]
+
+    for model_path in model_paths:
+        model = ModelType.load(model_path)
+
+        oracle_path = None
+        # Check if we can directly use oracle from `oracle.pkl`
+        if "auto" == cmp_cfg["oracle"]:
+            model_basename = os.path.basename(os.path.normpath(model_path))
+            oracle_path = model_path.replace(model_basename, "oracle.pkl")
+            if not os.path.exists(oracle_path):
+                oracle_path = None
+        elif cmp_cfg["oracle"] is not None:
+            oracle_path = cmp_cfg["oracle"]
+
+        if not os.path.exists(oracle_path):
+            oracle_path = None
+
+        this_fac = BackendFactory.init(
+            cfg["backend"]["type"],
+            target=cfg["backend"]["target"],
+            optmax=cfg["backend"]["optmax"],
+            parse_name=True,
+        )
+
+        render = Render()
+        render.emit_model(model)
+        render.emit_input(model, oracle_path)
+        render.emit_backend(this_fac)
+
+        print("#", "-" * 20)
+        print(f"# {model_path}")
+        print(render.render())
+        print("#", "-" * 20)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/nnsmith/logging.py b/nnsmith/logging.py
index f434b843..03ecd58b 100644
--- a/nnsmith/logging.py
+++ b/nnsmith/logging.py
@@ -6,6 +6,7 @@
 SMT_LOG = logging.getLogger("smt")
 EXEC_LOG = logging.getLogger("exec")
 DTEST_LOG = logging.getLogger("dtest")
+RENDER_LOG = logging.getLogger("render")
 CORE_LOG = logging.getLogger("core")
 
 TF_LOG = logging.getLogger("gen|tf")
diff --git a/nnsmith/materialize/torch/__init__.py b/nnsmith/materialize/torch/__init__.py
index 117f054c..7e3234fe 100644
--- a/nnsmith/materialize/torch/__init__.py
+++ b/nnsmith/materialize/torch/__init__.py
@@ -190,7 +190,7 @@ def import_libs(self) -> List[str]:
 
     def emit_input(self, inp_name: str, path: Optional[PathLike] = None):
         if path is not None:  # Assume NumPy tensors as inputs
-            return f"{inp_name} = [v for _, v in pickle.load(open('{path}', 'rb'))['input']]"
+            return f"{inp_name} = [v for _, v in pickle.load(open('{path}', 'rb'))['input'].items()]"
 
         # Path is None. Generate inputs from scratch.
         tensor_text = []
diff --git a/setup.cfg b/setup.cfg
index 07772e6c..ee0e3a46 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -42,3 +42,4 @@ console_scripts =
     nnsmith.model_exec = nnsmith.cli.model_exec:main
     nnsmith.dtype_test = nnsmith.cli.dtype_test:main
     nnsmith.fuzz = nnsmith.cli.fuzz:main
+    nnsmith.report_syn = nnsmith.cli.report_syn:main