tancheng · tancheng · Jan 1, 2026 · Dec 30, 2025 · Dec 30, 2025 · Dec 30, 2025
diff --git a/tools/expandable/main.py b/tools/expandable/main.py
@@ -16,6 +16,7 @@
 # ----------------------------------------------------------------------------
 VISUALIZATION = True
 TESTME = False
+FUSION = False
 
 # Static kernel data (name: (sort_id, total_iterations, static_execution_time))
 KERNEL_DATA = {
@@ -96,6 +97,8 @@ def parse_arguments():
                        help='Timeout setting for operations')
     parser.add_argument('--visualize', type=str_to_bool, default=VISUALIZATION,
                        help='Generate visualization figures [y/n]')
+    parser.add_argument('--fusion', type=str_to_bool, default=FUSION,
+                       help='Default fusion strategy for kernels [y/n]')
 
     return parser.parse_args()
 
@@ -106,15 +109,17 @@ def load_configuration():
     2. Default values (lowest priority)
     """
     # Update global configuration with command line arguments
-    global VISUALIZATION, TESTME
+    global VISUALIZATION, TESTME, FUSION
     # Parse command line arguments
     args = parse_arguments()
     VISUALIZATION = args.visualize
     TESTME = args.test
+    FUSION = args.fusion
     scheduler.init_args(args)
     print(f"Test in CI/CD: {args.test}")
     print(f"Timeout: {args.time_out_set}")
     print(f"Visualization: {args.visualize}")
+    print(f"FUSION: {args.fusion}")
 
 
 # ========== Task Loading Function ==========
@@ -287,13 +292,16 @@ def main():
     print("[Step 1] Loading tasks and Scheduling tasks on 4x4 Multi-CGRA...")
     if TESTME:
         run_simulation_for_case(1)
-        # run_simulation_for_case(task_id = 6, num_task_cgras=4, file_name="2x2", load_from_file=True)  # 2x2
+        run_simulation_for_case(task_id = 6, num_task_cgras=4, file_name="2x2", load_from_file=True)  # 2x2
     else:
         for task_case_id in TASK_CONFIGS:
             run_simulation_for_case(task_case_id)
 
         # 4. Execute scheduling
         print("[Step 2] Loading tasks and Scheduling tasks on 2x2, 3x3, 5x5 Multi-CGRA...")
+        global FUSION
+        FUSION = True
+        scheduler.update_args(FUSION)
         run_simulation_for_case(task_id = 6, num_task_cgras=4, file_name="2x2", load_from_file=True)  # 2x2
         run_simulation_for_case(task_id = 6, num_task_cgras=9, file_name="3x3", load_from_file=True)  # 3x3
         run_simulation_for_case(task_id = 6, num_task_cgras=16, file_name="4x4", load_from_file=True)  # 4x4

diff --git a/tools/expandable/param.json b/tools/expandable/param.json
@@ -6,8 +6,8 @@
         0
     ],
     "doCGRAMapping": true,
-    "row": 12,
-    "column": 12,
+    "row": 4,
+    "column": 4,
     "precisionAware": false,
     "fusionStrategy": [
         "default_heterogeneous"

diff --git a/tools/expandable/util/scheduler.py b/tools/expandable/util/scheduler.py
@@ -21,14 +21,20 @@
 JSON_NAME = "./param.json"
 TIME_OUT_SET = 180
 KERNEL_DIRECTORY = "../../test/kernels"
-
+KERNEL_FUSION = False
 
 def init_args(args):
     """init config"""
-    global JSON_NAME, TIME_OUT_SET, KERNEL_DIRECTORY
+    global JSON_NAME, TIME_OUT_SET, KERNEL_DIRECTORY, KERNEL_FUSION
     JSON_NAME = args.json_name
     KERNEL_DIRECTORY = args.kernel_directory
     TIME_OUT_SET = args.time_out_set
+    KERNEL_FUSION = args.fusion
+
+def update_args(args):
+    """init config"""
+    global KERNEL_FUSION
+    KERNEL_FUSION = args
 
 # ----------------------------------------------------------------------------
 #   class defination                                                         /
@@ -73,10 +79,7 @@ def __lt__(self, other):
     def load_data(self):
         prefix = './tmp/t_'
         csv_name = f'{prefix}{self.kernel_name}_{self.rows}x{self.columns}_unroll{self.unroll_factor}_vector{self.vector_factor}.csv'
-        if os.path.exists(csv_name):
-            self.read_ii(csv_name)
-        else:
-            self.get_ii(csv_name)
+        self.get_ii(csv_name)
 
         self.is_valid = bool(self.base_ii)
         # print(f"Kernel {self.kernel_name} loaded with arrive_period={self.arrive_period}")
@@ -157,7 +160,6 @@ def map_kernel(self):
                     gen_map_proc.stdout.flush()
                     for line in iter(gen_map_proc.stdout.readline, b''):
                         output_line = line.decode("ISO-8859-1")
-                        #print(output_line)
                         if "DFG node count: " in output_line:
                             dataS.append(int(output_line.split("DFG node count: ")[1].split(";")[0]))
                             dataS.append(int(output_line.split("DFG edge count: ")[1].split(";")[0]))
@@ -183,7 +185,6 @@ def map_kernel(self):
 
         self.df.loc[len(self.df.index)] = dataS
 
-
     def map_kernel_skip(self):
         """
         This is a func gain DFG information only without mapping.
@@ -228,7 +229,12 @@ def get_ii(self, csv_name):
         """
         # print("Generating", csv_name)
         target_kernel = self.comp_kernel()
-
+        target_fusion_strategy = []
+        target_fused_kernel = ['gemm.c', 'fft.c', 'spmv.c']
+        if KERNEL_FUSION:
+            target_fusion_strategy = ["default_heterogeneous"]
+        else:
+            target_fusion_strategy = []
         neura_json = {
             "kernel": target_kernel,
             "targetFunction": False,
@@ -238,7 +244,7 @@ def get_ii(self, csv_name):
             "row": self.rows,
             "column": self.columns,
             "precisionAware": False,
-            "fusionStrategy": ["default_heterogeneous"],
+            "fusionStrategy": target_fusion_strategy,
             "isTrimmedDemo": True,
             "heuristicMapping": True,
             "parameterizableCGRA": False,

diff --git a/tools/expandable/util/visualizer.py b/tools/expandable/util/visualizer.py
@@ -185,7 +185,7 @@ def load_scalability_data(self, task_case: str, csv_name: str, execution_baselin
 
             # Cache the data
             cache_key = f"{task_case}_{csv_name}"
-            self.scalability_cache[cache_key] = df['Total_Execution_duration'] / execution_baseline
+            self.scalability_cache[cache_key] = df['Total_Execution_duration'].head(9) / execution_baseline
             self.latency_cache[cache_key] = df['Overall_Case_Latency'] / latency_baseline
             self.utilization_cache[cache_key] = df['CGRA_Utilization']
             return self.scalability_cache[cache_key]
@@ -231,7 +231,6 @@ def genFig9(self, fig_path: str):
                 cache_key = f"{case}_{group}"  # Adjust based on your actual naming convention
                 execution_series = self.execution_cache.get(cache_key)
                 utilization_series = self.utilization_cache.get(cache_key)
-
                 # Bar chart data - Resource utilization
                 if execution_series is not None:
                     if hasattr(execution_series, 'to_dict'):
@@ -480,6 +479,8 @@ def genFig11(self, fig_path: str):
         scalability_series = self.scalability_cache.get(cache_key)
         latency_series = self.latency_cache.get(cache_key)
         throughput_speedup = [0] * len(scalability_series)
+        tmp = [0] * len(scalability_series)
+        throughput_speedup_percentage = [0] * len(scalability_series)
         for i in range(len(scalability_series)):
             throughput_speedup[i] = (1 / (scalability_series[i] * latency_series[i] * 100))
         throughput_baseline = sum(throughput_speedup)
@@ -493,14 +494,19 @@ def genFig11(self, fig_path: str):
                 utilization_series is None):
                     continue
                 for i in range(len(scalability_series)):
+                    tmp[i] = scalability_series[i] * latency_series[i]
                     if scalability_series[i] * latency_series[i] == 0:
-                        tmp = 0
+                        throughput_tmp = 0
                     else:
-                        tmp = (1 / (scalability_series[i] * latency_series[i] * 100))
-                    throughput_speedup[i] = tmp / throughput_baseline
+                        throughput_tmp = (1 / (scalability_series[i] * latency_series[i] * 100))
+                    throughput_speedup[i] = throughput_tmp / throughput_baseline
+                sum_tmp = sum(tmp)
+                sum_throughput = sum(throughput_speedup)
+                for i in range(len(scalability_series)):
+                    throughput_speedup_percentage[i] = (tmp[i] / sum_tmp) * sum_throughput
                 # Bar chart data
                 for i, kernel in enumerate(self.KERNEL_NAMES):
-                    bar_data[kernel].append(throughput_speedup[i])
+                    bar_data[kernel].append(throughput_speedup_percentage[i])
 
                 # Line chart data
                 if utilization_series is not None:
@@ -632,21 +638,4 @@ def genFig11(self, fig_path: str):
         plt.tight_layout()
         # plt.legend()
         plt.savefig(fig_path)
-        print(f"Generated fig {fig_path}")
-
-if __name__ == '__main__':
-    KERNEL_DATA = {
-    "fir.cpp": (7, 2048, 4096),
-    "latnrm.c": (8, 1280, 2560),
-    "fft.c": (2, 112640, 450560),
-    "dtw.cpp": (4, 16384, 49152),
-    "spmv.c": (3, 65536, 262144),
-    "conv.c": (1, 655360, 1310720),
-    "mvt.c": (5, 16384, 49152),
-    "gemm.c": (0, 2097152, 8388608),
-    "relu+histogram.c": (6, 262144, 2097152)
-    }
-    genFigs = SimulationDataAnalyzer(kernel_data=KERNEL_DATA)
-    genFigs.genFig9("./fig/Fig9Test.png")
-    #genFigs.genFig10("./fig/Fig10.png")
-    genFigs.genFig11("./fig/Fig11Test.png")
+        print(f"Generated fig {fig_path}")