From 651c47642892066383ecc1ec6635744013e0119d Mon Sep 17 00:00:00 2001 From: Ayaka Yorihiro <36107281+ayakayorihiro@users.noreply.github.com> Date: Wed, 18 Sep 2024 12:29:56 -0700 Subject: [PATCH] [Profiling] Visualizations: Cycle flame graph, frequency flame graph, timeline view (#2284) The profiling script (`get-profile-counts-info.sh`) now produces visualizations! - Cycle flame graph: `flame.svg` can be viewed using your favorite web browser. For any group, it shows the "call stack" and the number of cycles it was active for. - Frequency flame graph: `frequency-flame.svg` can also be viewed using your favorite web browser. For any group, it shows the "call stack" and the number of times the group was active. - Timeline view: `timeline.json` is a JSON file in the [Google Trace File Format](https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview). You can visualize the timeline using [Perfetto UI](https://ui.perfetto.dev/). The biggest caveat to the profiler right now is that the visualizations only work for **non-optimized, sequential (i.e. no `par`) Calyx programs**. I will work on expanding the profiler's capabilities in the near future (after finding some example use cases where knowing performance information was helpful). --- .gitignore | 3 + tools/profiler/convert-dump.py | 43 --- tools/profiler/create-visuals.py | 299 ++++++++++++++++ tools/profiler/get-profile-counts-info.sh | 62 +++- .../linear-algebra-bicg/flame.folded | 24 ++ .../linear-algebra-bicg/line-by-line.folded | 24 ++ .../linear-algebra-bicg/original-flame.folded | 24 ++ .../handmade-flame-graphs/ntt-32/flame.folded | 337 ++++++++++++++++++ .../ntt-32/new-flame.folded | 337 ++++++++++++++++++ .../ntt-32/original-flame.folded | 337 ++++++++++++++++++ tools/profiler/parse-vcd.py | 71 ++-- tools/profiler/run-up-to-tdcc.sh | 4 +- 12 files changed, 1494 insertions(+), 71 deletions(-) delete mode 100644 tools/profiler/convert-dump.py create mode 100644 tools/profiler/create-visuals.py create mode 100644 tools/profiler/handmade-flame-graphs/linear-algebra-bicg/flame.folded create mode 100644 tools/profiler/handmade-flame-graphs/linear-algebra-bicg/line-by-line.folded create mode 100644 tools/profiler/handmade-flame-graphs/linear-algebra-bicg/original-flame.folded create mode 100644 tools/profiler/handmade-flame-graphs/ntt-32/flame.folded create mode 100644 tools/profiler/handmade-flame-graphs/ntt-32/new-flame.folded create mode 100644 tools/profiler/handmade-flame-graphs/ntt-32/original-flame.folded diff --git a/.gitignore b/.gitignore index 1f773c74e..76b8b9a9b 100644 --- a/.gitignore +++ b/.gitignore @@ -57,6 +57,9 @@ tools/btor2/btor2i/build/ # profiling ignore tools/profiler/data +tools/profiler/meta-logs +tools/profiler/fg-tmp +tools/profiler/handmade-flame-graphs/*/*.svg temp/ diff --git a/tools/profiler/convert-dump.py b/tools/profiler/convert-dump.py deleted file mode 100644 index 04a88e7b8..000000000 --- a/tools/profiler/convert-dump.py +++ /dev/null @@ -1,43 +0,0 @@ -# Takes in a dump file created by parse-vcd.py and creates a JSON file in the Google Trace Event Format -import json -import sys - -# Starting with the JSON array format for now... -# example -# [ {"name": "Asub", "cat": "PERF", "ph": "B", "pid": 22630, "tid": 22630, "ts": 829}, -# {"name": "Asub", "cat": "PERF", "ph": "E", "pid": 22630, "tid": 22630, "ts": 833} ] - -def main(profiler_dump_file, out_file): - profiled_info = json.load(open(profiler_dump_file, "r")) - cat = "GT" # Ground truth category (will overwrite if it's FSM) - events = [] - id_acc = 1 - ts_multiplier = 100 # some arbitrary number to multiply by so that it's easier to see in the viewer - for group_info in profiled_info: - name = group_info["name"].split("TOP.toplevel.", 1)[1] - if group_info["fsm_name"] is not None: - cat = "FSM" - name = "[FSM] " + name - for segment in group_info["closed_segments"]: - # beginning of segment - begin_time = segment["start"] * ts_multiplier - events.append({"name": name, "cat": cat, "ph": "B", "pid" : id_acc, "tid": id_acc, "ts" : begin_time}) - # end of segment - end_time = segment["end"] * ts_multiplier - events.append({"name": name, "cat": cat, "ph": "E", "pid": id_acc, "tid": id_acc, "ts": end_time}) - id_acc += 1 - with open(out_file, "w") as out: - json.dump(events, out, indent=4) - -if __name__ == "__main__": - if len(sys.argv) > 2: - profiler_dump_json = sys.argv[1] - visuals_json = sys.argv[2] - main(profiler_dump_json, visuals_json) - else: - args_desc = [ - "PROFILER_JSON", - "VISUALS_JSON" - ] - print(f"Usage: {sys.argv[0]} {' '.join(args_desc)}") - sys.exit(-1) diff --git a/tools/profiler/create-visuals.py b/tools/profiler/create-visuals.py new file mode 100644 index 000000000..aa7339ac5 --- /dev/null +++ b/tools/profiler/create-visuals.py @@ -0,0 +1,299 @@ +# Takes in a dump file created by parse-vcd.py and creates a JSON file in the Google Trace Event Format +import json +import sys + +class FlameInfo: + def __init__(self, name, backptr, cycles, is_fsm): + self.name = name + self.backptr = backptr + self.cycles = cycles + self.is_fsm = is_fsm + + def make_folded_log_entry(self): + if self.backptr is not None: + return f'{self.backptr};{self.name} {self.cycles}' + else: + return f'{self.name} {self.cycles}' + +# Computes which groups have a FSM-recorded group +def get_fsm_groups(profiled_info): + fsm_groups = set() + all_groups = set() + for group_info in profiled_info: + if group_info["name"] == "TOTAL" or group_info["component"] is None: + continue + all_groups.add(group_info["name"]) + if group_info["fsm_name"] is not None: + fsm_groups.add(group_info["name"]) + return fsm_groups, all_groups + +def create_timeline_map(profiled_info, fsm_groups, all_groups): + summary = list(filter(lambda x : x["name"] == "TOTAL", profiled_info))[0] + total_cycles = summary["total_cycles"] + only_gt_groups = all_groups.difference(fsm_groups) + timeline_map = {i : {} for i in range(total_cycles)} + fsm_timeline_map = {i : {} for i in range(total_cycles)} + group_to_gt_segments = {} # we need segment info for frequency checking + for group_info in profiled_info: + group_name = group_info["name"] + if group_name == "TOTAL" or group_info["component"] is None: # only care about actual groups + continue + for segment in group_info["closed_segments"]: + if group_info["fsm_name"] is None: + if group_name not in group_to_gt_segments: + group_to_gt_segments[group_name] = {} # segment start cycle to segment end cycle + group_to_gt_segments[group_name][segment["start"]] = segment["end"] + for i in range(segment["start"], segment["end"]): # really janky, I wonder if there's a better way to do this? + if group_info["fsm_name"] is not None: # FSM version + fsm_timeline_map[i][group_info["component"]] = group_name + elif group_name in only_gt_groups: # A group that isn't managed by an FSM. In which case it has to be in both FSM and GT + fsm_timeline_map[i][group_info["component"]] = group_name + timeline_map[i][group_info["component"]] = group_name + else: # The ground truth info about a group managed by an FSM. + timeline_map[i][group_info["component"]] = group_name + + return timeline_map, fsm_timeline_map, group_to_gt_segments + +def create_frequency_flame_graph(main_component, cells_map, timeline, group_to_gt_segments, frequency_flame_out): + main_shortname = main_component.split("TOP.toplevel.")[1] + frequency_stacks = {} + i = 0 + while i < len(timeline): + if len(timeline[i]) == 0: + i += 1 + continue + group_component = sorted(timeline[i], key=lambda k : timeline[i][k].count("."), reverse=True)[0] + group_full_name = timeline[i][group_component] + stack = "" + group_name = group_full_name.split(".")[-1] + # FIXME: code clone + if group_component == main_shortname: + stack = main_component + ";" + group_name + else: + after_main = group_full_name.split(f"{main_component}.")[1] + after_main_split = after_main.split(".")[:-1] + # first, find the group in main that is simulatenous + if main_shortname not in timeline[i]: + print(f"Error: A group from the main component ({main_shortname}) should be active at cycle {i}!") + exit(1) + backptrs = [main_component] + group_from_main = timeline[i][main_shortname].split(main_component + ".")[-1] + backptrs.append(group_from_main) + prev_component = main_shortname + for cell_name in after_main_split: + cell_component = cells_map[prev_component][cell_name] + group_from_component = timeline[i][cell_component].split(cell_name + ".")[-1] + backptrs.append(f"{cell_component}[{prev_component}.{cell_name}];{group_from_component}") + prev_component = cell_component + stack = ";".join(backptrs) + if stack not in frequency_stacks: + frequency_stacks[stack] = 0 + frequency_stacks[stack] += 1 + i = group_to_gt_segments[group_full_name][i] # the next segment to check starts at the end time of this segment + + write_flame_graph(frequency_flame_out, frequency_stacks) + +# attempt to rehash the create_flame_graph to take care of stacks +def create_flame_graph(main_component, cells_map, timeline, fsm_timeline, flame_out, fsm_flame_out): + stacks = compute_flame_stacks(cells_map, timeline, main_component) + write_flame_graph(flame_out, stacks) + fsm_stacks = compute_flame_stacks(cells_map, fsm_timeline, main_component) + write_flame_graph(fsm_flame_out, fsm_stacks) + +def create_timeline_stacks(timeline, main_component): + events = [] + currently_active = {} # group name to beginning traceEvent entry (so end event can copy) + ts_multiplier = 100 # some arbitrary number to multiply by so that it's easier to see in the viewer + cell_to_stackframe_info = {main_component : (2, 1)} # (stack_number, parent_stack_number) + stack_number_acc = 3 # To guarantee that we get unique stack numbers when we need a new one + + # Beginning and end events for main signify the overall running time (stack 1) + main_event_details = {"name": main_component, "sf": 1, "cat": "MAIN", "pid": 1, "tid": 1} + main_start = main_event_details.copy() + main_start["ts"] = 0 + main_start["ph"] = "B" + events.append(main_start) + main_end = main_event_details.copy() + main_end["ts"] = len(timeline) * ts_multiplier + main_end["ph"] = "E" + events.append(main_end) + cell_to_stackframe_info["MAIN"] = (1, None) + + for i in timeline: + active_this_cycle = set() + # Differently from compute_flame_stacks, we start from the bottom up. (easier to see parent) + sorted_active_groups = list(sorted(timeline[i], key=lambda k : timeline[i][k].count("."))) + for group_component in sorted_active_groups: + group_full_name = timeline[i][group_component] + active_this_cycle.add(group_full_name) + if group_full_name not in currently_active: # first cycle of the group. We need to figure out the stack + group_split = group_full_name.split(".") + group_cell = ".".join(group_split[:-1]) + group_shortname = group_split[-1] + stackframe = -1 # FIXME: find the appropriate stack frame + if group_cell in cell_to_stackframe_info: + (stackframe, _) = cell_to_stackframe_info[main_component] + else: + # Since we are iterating from the shortest to longest name (based on cell counts) + # The group's cell's parent *must* be in cell_to_stackframe_info + group_cell_parent = ".".join(group_split[:-2]) + (parent_stackframe, _) = cell_to_stackframe_info[group_cell_parent] + stackframe = stack_number_acc + stack_number_acc += 1 + cell_to_stackframe_info[group_cell] = (stackframe, parent_stackframe) + start_event = {"name": group_shortname, "cat": group_component, "ph": "B", "pid" : 1, "tid": 1, "ts": i * ts_multiplier, "sf" : stackframe} + events.append(start_event) + currently_active[group_full_name] = start_event + # Any group that was previously active but not active this cycle need to end + for non_active_group in set(currently_active.keys()).difference(active_this_cycle): + end_event = currently_active[non_active_group].copy() + del currently_active[non_active_group] + end_event["ts"] = (i) * ts_multiplier - 1 + end_event["ph"] = "E" + events.append(end_event) + # postprocess - add end events for all events still active by the end + for event in currently_active: + end_event = currently_active[event].copy() + end_event["ts"] = (len(timeline)) * ts_multiplier - 1 # only difference w the above + end_event["ph"] = "E" + events.append(end_event) + + # "stackFrames" field of the Trace Format JSON + stacks = {} + stack_category = "C" + for cell in cell_to_stackframe_info: + stack_id, parent_stack_id = cell_to_stackframe_info[cell] + if parent_stack_id is None: + stacks[stack_id] = {"name" : "MAIN", "category": stack_category} + else: + stacks[stack_id] = {"name" : cell, "parent": parent_stack_id, "category" : stack_category} + + return { "traceEvents": events, "stackFrames": stacks } + +def create_timeline_json(timeline, fsm_timeline, main_component, timeline_out, fsm_timeline_out): + timeline_json_data = create_timeline_stacks(timeline, main_component) + with open(timeline_out, "w", encoding="utf-8") as timeline_file: + timeline_file.write(json.dumps(timeline_json_data, indent=4)) + fsm_timeline_json_data = create_timeline_stacks(fsm_timeline, main_component) + with open(fsm_timeline_out, "w", encoding="utf-8") as fsm_timeline_file: + fsm_timeline_file.write(json.dumps(fsm_timeline_json_data, indent=4)) + + +def compute_flame_stacks(cells_map, timeline, main_component): + main_shortname = main_component.split("TOP.toplevel.")[1] + stacks = {} # each stack to the # of cycles it was active for + nonactive_cycles = 0 # cycles where no group was active + for i in timeline: # keys in the timeline are clock time stamps + # Right now we are assuming that there are no pars. So for any time stamp, *if there are multiple* groups active, + # we need to find the one that is the longest (since that's the innermost one). + # NOTE: This might be generalizable for even the 1 group active case? Going to try it out + if len(timeline[i]) == 0: + nonactive_cycles += 1 + continue + group_component = sorted(timeline[i], key=lambda k : timeline[i][k].count("."), reverse=True)[0] + group_full_name = timeline[i][group_component] + stack = "" + group_name = group_full_name.split(".")[-1] + if group_component == main_shortname: + stack = main_component + ";" + group_name + else: + after_main = group_full_name.split(f"{main_component}.")[1] + after_main_split = after_main.split(".")[:-1] + # first, find the group in main that is simulatenous + if main_shortname not in timeline[i]: + print(f"Error: A group from the main component ({main_shortname}) should be active at cycle {i}!") + exit(1) + backptrs = [main_component] + group_from_main = timeline[i][main_shortname].split(main_component + ".")[-1] + backptrs.append(group_from_main) + prev_component = main_shortname + for cell_name in after_main_split: + cell_component = cells_map[prev_component][cell_name] + group_from_component = timeline[i][cell_component].split(cell_name + ".")[-1] + backptrs.append(f"{cell_component}[{prev_component}.{cell_name}];{group_from_component}") + prev_component = cell_component + stack = ";".join(backptrs) + + if stack not in stacks: + stacks[stack] = 0 + stacks[stack] += 1 + + stacks[main_component] = nonactive_cycles + return stacks + +def write_flame_graph(flame_out, stacks): + with open(flame_out, "w") as f: + for stack in sorted(stacks, key=lambda k : len(k)): # main needs to come first for flame graph script to not make two boxes for main? + f.write(f"{stack} {stacks[stack]}\n") + +# Starting with the JSON array format for now... [Needs to be fixed] +# example +# [ {"name": "Asub", "cat": "PERF", "ph": "B", "pid": 22630, "tid": 22630, "ts": 829}, +# {"name": "Asub", "cat": "PERF", "ph": "E", "pid": 22630, "tid": 22630, "ts": 833} ] +def create_timeline_view(profiled_info, out_file): + cat = "GT" # Ground truth category (will overwrite if it's FSM) + events = [] + id_acc = 1 + ts_multiplier = 100 # some arbitrary number to multiply by so that it's easier to see in the viewer + for group_info in profiled_info: + if group_info["name"] == "TOTAL": # timeline view doesn't need a total time + continue + name = group_info["name"].split("TOP.toplevel.", 1)[1] + if group_info["fsm_name"] is not None: + cat = "FSM" + name = "[FSM] " + name + for segment in group_info["closed_segments"]: + # beginning of segment + begin_time = segment["start"] * ts_multiplier + events.append({"name": name, "cat": cat, "ph": "B", "pid" : id_acc, "tid": id_acc, "ts" : begin_time}) + # end of segment + end_time = segment["end"] * ts_multiplier + events.append({"name": name, "cat": cat, "ph": "E", "pid": id_acc, "tid": id_acc, "ts": end_time}) + id_acc += 1 + with open(out_file, "w") as out: + json.dump(events, out, indent=4) + +def build_cells_map(json_file): + cell_json = json.load(open(json_file)) + cells_map = {} + for component_entry in cell_json: + inner_cells_map = {} + for cell_entry in component_entry["cell_info"]: + inner_cells_map[cell_entry["cell_name"]] = cell_entry["component_name"] + cells_map[component_entry["component"]] = inner_cells_map + return cells_map + +def main(profiler_dump_file, cells_json, timeline_out, fsm_timeline_out, flame_out, fsm_flame_out, frequency_flame_out): + profiled_info = json.load(open(profiler_dump_file, "r")) + fsm_groups, all_groups = get_fsm_groups(profiled_info) + # This cells_map is different from the one in parse-vcd.py + cells_map = build_cells_map(cells_json) + timeline, fsm_timeline, group_to_gt_segments = create_timeline_map(profiled_info, fsm_groups, all_groups) + summary = list(filter(lambda x : x["name"] == "TOTAL", profiled_info))[0] + main_component = summary["main_full_path"] + create_flame_graph(main_component, cells_map, timeline, fsm_timeline, flame_out, fsm_flame_out) + create_timeline_json(timeline, fsm_timeline, main_component, timeline_out, fsm_timeline_out) + create_frequency_flame_graph(main_component, cells_map, timeline, group_to_gt_segments, frequency_flame_out) + +if __name__ == "__main__": + if len(sys.argv) > 7: + profiler_dump_json = sys.argv[1] + cells_json = sys.argv[2] + timeline_out = sys.argv[3] + fsm_timeline_out = sys.argv[4] + flame_out = sys.argv[5] + fsm_flame_out = sys.argv[6] + frequency_flame_out = sys.argv[7] + main(profiler_dump_json, cells_json, timeline_out, fsm_timeline_out, flame_out, fsm_flame_out, frequency_flame_out) + else: + args_desc = [ + "PROFILER_JSON", + "CELLS_JSON", + "TIMELINE_VIEW_JSON", + "FSM_TIMELINE_VIEW_JSON", + "FLAME_GRAPH_FOLDED", + "FSM_FLAME_GRAPH_FOLDED", + "FREQUENCY_FLAME_GRAPH_FOLDED" + ] + print(f"Usage: {sys.argv[0]} {' '.join(args_desc)}") + sys.exit(-1) diff --git a/tools/profiler/get-profile-counts-info.sh b/tools/profiler/get-profile-counts-info.sh index e796c8e83..61fa78a80 100644 --- a/tools/profiler/get-profile-counts-info.sh +++ b/tools/profiler/get-profile-counts-info.sh @@ -20,11 +20,24 @@ else OUT_CSV=${TMP_DIR}/summary.csv fi +FLAMEGRAPH_DIR=${SCRIPT_DIR}/fg-tmp + +if [ ! -d ${FLAMEGRAPH_DIR} ]; then + ( + cd ${SCRIPT_DIR} + git clone git@github.com:brendangregg/FlameGraph.git fg-tmp + ) +fi + TMP_VERILOG=${TMP_DIR}/no-opt-verilog.sv FSM_JSON=${TMP_DIR}/fsm.json CELLS_JSON=${TMP_DIR}/cells.json OUT_JSON=${TMP_DIR}/dump.json -VISUALS_JSON=${TMP_DIR}/visual.json +TIMELINE_VIEW_JSON=${TMP_DIR}/timeline.json +FSM_TIMELINE_VIEW_JSON=${TMP_DIR}/fsm-timeline.json +FLAME_GRAPH_FOLDED=${TMP_DIR}/flame.folded +FSM_FLAME_GRAPH_FOLDED=${TMP_DIR}/fsm-flame.folded +FREQUENCY_FLAME_GRAPH_FOLDED=${TMP_DIR}/frequency-flame.folded VCD_FILE=${TMP_DIR}/trace.vcd LOGS_DIR=${DATA_DIR}/logs if [ -d ${DATA_DIR} ]; then @@ -39,10 +52,15 @@ echo "[${SCRIPT_NAME}] Obtaining FSM info from TDCC" ( cd ${CALYX_DIR} set -o xtrace - cargo run -- ${INPUT_FILE} -p no-opt -x tdcc:dump-fsm-json="${FSM_JSON}" + cargo run -- ${INPUT_FILE} -p compile-repeat -p no-opt -x tdcc:dump-fsm-json="${FSM_JSON}" # -p par-to-seq set +o xtrace ) &> ${LOGS_DIR}/gol-tdcc +if [ ! -f ${FSM_JSON} ]; then + echo "[${SCRIPT_NAME}] Failed to generate ${FSM_JSON}! Exiting" + exit 1 +fi + # Run component-cells backend to get cell information echo "[${SCRIPT_NAME}] Obtaining cell information from component-cells backend" ( @@ -51,27 +69,59 @@ echo "[${SCRIPT_NAME}] Obtaining cell information from component-cells backend" cargo run --manifest-path tools/component_cells/Cargo.toml ${INPUT_FILE} -o ${CELLS_JSON} ) &> ${LOGS_DIR}/gol-cells +if [ ! -f ${CELLS_JSON} ]; then + echo "[${SCRIPT_NAME}] Failed to generate ${CELLS_JSON}! Exiting" + exit 1 +fi + # Run simuation to get VCD echo "[${SCRIPT_NAME}] Obtaining VCD file via simulation" ( set -o xtrace - fud2 ${INPUT_FILE} -o ${VCD_FILE} --through verilator -s calyx.args='-p no-opt' -s sim.data=${SIM_DATA_JSON} + fud2 ${INPUT_FILE} -o ${VCD_FILE} --through verilator -s calyx.args='-p compile-repeat -p no-opt' -s sim.data=${SIM_DATA_JSON} # -p par-to-seq set +o xtrace ) &> ${LOGS_DIR}/gol-vcd +if [ ! -f ${VCD_FILE} ]; then + echo "[${SCRIPT_NAME}] Failed to generate ${VCD_FILE}! Exiting" + exit 1 +fi + # Run script to get cycle level counts echo "[${SCRIPT_NAME}] Using FSM info and VCD file to obtain cycle level counts" ( + set -o xtrace python3 ${SCRIPT_DIR}/parse-vcd.py ${VCD_FILE} ${FSM_JSON} ${CELLS_JSON} ${OUT_CSV} ${OUT_JSON} + set +o xtrace ) &> ${LOGS_DIR}/gol-process if [ "$4" == "-d" ]; then - cat ${LOGS_DIR}/gol-process | grep -v Writing + cat ${LOGS_DIR}/gol-process | grep -v Writing # exclude lines that show paths else - tail -2 ${LOGS_DIR}/gol-process + tail -3 ${LOGS_DIR}/gol-process | head -2 # last line is the set +o xtrace, which we don't need to show fi echo "[${SCRIPT_NAME}] Writing visualization" ( - python3 ${SCRIPT_DIR}/convert-dump.py ${OUT_JSON} ${VISUALS_JSON} + set -o xtrace + python3 ${SCRIPT_DIR}/create-visuals.py ${OUT_JSON} ${CELLS_JSON} ${TIMELINE_VIEW_JSON} ${FSM_TIMELINE_VIEW_JSON} ${FLAME_GRAPH_FOLDED} ${FSM_FLAME_GRAPH_FOLDED} ${FREQUENCY_FLAME_GRAPH_FOLDED} + set +o xtrace ) &> ${LOGS_DIR}/gol-visuals + +echo "[${SCRIPT_NAME}] Creating flame graph svg" +( + set -o xtrace + for opt in "" "--inverted" "--reverse"; do + if [ "${opt}" == "" ]; then + filename=flame + else + filename=flame"${opt:1}" + fi + ${FLAMEGRAPH_DIR}/flamegraph.pl ${opt} --countname="cycles" ${FLAME_GRAPH_FOLDED} > ${TMP_DIR}/${filename}.svg + echo + ${FLAMEGRAPH_DIR}/flamegraph.pl ${opt} --countname="cycles" ${FSM_FLAME_GRAPH_FOLDED} > ${TMP_DIR}/fsm-${filename}.svg + echo + ${FLAMEGRAPH_DIR}/flamegraph.pl ${opt} --countname="times active" ${FREQUENCY_FLAME_GRAPH_FOLDED} > ${TMP_DIR}/frequency-${filename}.svg + done + set +o xtrace +) &> ${LOGS_DIR}/gol-flamegraph diff --git a/tools/profiler/handmade-flame-graphs/linear-algebra-bicg/flame.folded b/tools/profiler/handmade-flame-graphs/linear-algebra-bicg/flame.folded new file mode 100644 index 000000000..1db061a2e --- /dev/null +++ b/tools/profiler/handmade-flame-graphs/linear-algebra-bicg/flame.folded @@ -0,0 +1,24 @@ +TOP.toplevel.main 772 +TOP.toplevel.main;for0;L0;init(i) 1 +TOP.toplevel.main;for0;L1 8 +TOP.toplevel.main;for0;L0;upd(i) 8 +TOP.toplevel.main;for1;L2;init(i) 1 +TOP.toplevel.main;for1;L3 8 +TOP.toplevel.main;for1;for2;L4;init(j) 8 +TOP.toplevel.main;for1;for2;L5 128 +TOP.toplevel.main;for1;for2;L6 128 +TOP.toplevel.main;for1;for2;L7 128 +TOP.toplevel.main;for1;for2;L8;read(r_int[i]) 128 +TOP.toplevel.main;for1;for2;L8;mult(r_int[i]*A_i_j) 192 +TOP.toplevel.main;for1;for2;L8;upd(s_int[j]) 64 +TOP.toplevel.main;for1;for2;L9;read(p_int[j]) 128 +TOP.toplevel.main;for1;for2;L9;mult(A_i_j*p_int[j]) 192 +TOP.toplevel.main;for1;for2;L9;upd(q_int[i]) 64 +TOP.toplevel.main;for1;for2;L4;upd(j) 64 +TOP.toplevel.main;for1;L2;upd(i) 8 +TOP.toplevel.main;for0;L0;init(repeat) 1 +TOP.toplevel.main;for0;L0;incr(repeat) 8 +TOP.toplevel.main;for1;L2;init(repeat) 1 +TOP.toplevel.main;for1;for2;L4;init(repeat) 8 +TOP.toplevel.main;for1;for2;L4;incr(repeat) 64 +TOP.toplevel.main;for1;L2;incr(repeat) 8 diff --git a/tools/profiler/handmade-flame-graphs/linear-algebra-bicg/line-by-line.folded b/tools/profiler/handmade-flame-graphs/linear-algebra-bicg/line-by-line.folded new file mode 100644 index 000000000..8fa3cb9d3 --- /dev/null +++ b/tools/profiler/handmade-flame-graphs/linear-algebra-bicg/line-by-line.folded @@ -0,0 +1,24 @@ +TOP.toplevel.main 772 +TOP.toplevel.main;L0;init(i) 1 +TOP.toplevel.main;L1 8 +TOP.toplevel.main;L0;upd(i) 8 +TOP.toplevel.main;L2;init(i) 1 +TOP.toplevel.main;L3 8 +TOP.toplevel.main;L4;init(j) 8 +TOP.toplevel.main;L5 128 +TOP.toplevel.main;L6 128 +TOP.toplevel.main;L7 128 +TOP.toplevel.main;L8;read(r_int[i]) 128 +TOP.toplevel.main;L8;mult(r_int[i]*A_i_j) 192 +TOP.toplevel.main;L8;upd(s_int[j]) 64 +TOP.toplevel.main;L9;read(p_int[j]) 128 +TOP.toplevel.main;L9;mult(A_i_j*p_int[j]) 192 +TOP.toplevel.main;L9;upd(q_int[i]) 64 +TOP.toplevel.main;L4;upd(j) 64 +TOP.toplevel.main;L2;upd(i) 8 +TOP.toplevel.main;L0;init(repeat) 1 +TOP.toplevel.main;L0;incr(repeat) 8 +TOP.toplevel.main;L2;init(repeat) 1 +TOP.toplevel.main;L4;init(repeat) 8 +TOP.toplevel.main;L4;incr(repeat) 64 +TOP.toplevel.main;L2;incr(repeat) 8 diff --git a/tools/profiler/handmade-flame-graphs/linear-algebra-bicg/original-flame.folded b/tools/profiler/handmade-flame-graphs/linear-algebra-bicg/original-flame.folded new file mode 100644 index 000000000..0d0c7aade --- /dev/null +++ b/tools/profiler/handmade-flame-graphs/linear-algebra-bicg/original-flame.folded @@ -0,0 +1,24 @@ +TOP.toplevel.main 772 +TOP.toplevel.main;let0 1 +TOP.toplevel.main;upd0 8 +TOP.toplevel.main;upd1 8 +TOP.toplevel.main;let1 1 +TOP.toplevel.main;upd2 8 +TOP.toplevel.main;let2 8 +TOP.toplevel.main;let3 128 +TOP.toplevel.main;let4 128 +TOP.toplevel.main;let5 128 +TOP.toplevel.main;let6 128 +TOP.toplevel.main;let7 192 +TOP.toplevel.main;upd3 64 +TOP.toplevel.main;let8 128 +TOP.toplevel.main;let9 192 +TOP.toplevel.main;upd4 64 +TOP.toplevel.main;upd5 64 +TOP.toplevel.main;upd6 8 +TOP.toplevel.main;init_repeat 1 +TOP.toplevel.main;incr_repeat 8 +TOP.toplevel.main;init_repeat1 1 +TOP.toplevel.main;init_repeat0 8 +TOP.toplevel.main;incr_repeat0 64 +TOP.toplevel.main;incr_repeat1 8 diff --git a/tools/profiler/handmade-flame-graphs/ntt-32/flame.folded b/tools/profiler/handmade-flame-graphs/ntt-32/flame.folded new file mode 100644 index 000000000..c44762924 --- /dev/null +++ b/tools/profiler/handmade-flame-graphs/ntt-32/flame.folded @@ -0,0 +1,337 @@ +TOP.toplevel.main 432 +TOP.toplevel.main;s0_mul;s0_mul9 2 +TOP.toplevel.main;s0_mul;s0_mul8 2 +TOP.toplevel.main;s0_mul;s0_mul7 2 +TOP.toplevel.main;s0_mul;s0_mul6 2 +TOP.toplevel.main;s0_mul;s0_mul5 2 +TOP.toplevel.main;s0_mul;s0_mul4 2 +TOP.toplevel.main;s0_mul;s0_mul3 2 +TOP.toplevel.main;s0_mul;s0_mul2 2 +TOP.toplevel.main;s0_mul;s0_mul1 2 +TOP.toplevel.main;s0_mul;s0_mul0 2 +TOP.toplevel.main;s1_mul;s1_mul9 2 +TOP.toplevel.main;s1_mul;s1_mul8 2 +TOP.toplevel.main;s1_mul;s1_mul7 2 +TOP.toplevel.main;s1_mul;s1_mul6 2 +TOP.toplevel.main;s1_mul;s1_mul5 2 +TOP.toplevel.main;s1_mul;s1_mul4 2 +TOP.toplevel.main;s1_mul;s1_mul3 2 +TOP.toplevel.main;s1_mul;s1_mul2 2 +TOP.toplevel.main;s1_mul;s1_mul1 2 +TOP.toplevel.main;s1_mul;s1_mul0 2 +TOP.toplevel.main;s2_mul;s2_mul9 2 +TOP.toplevel.main;s2_mul;s2_mul8 2 +TOP.toplevel.main;s2_mul;s2_mul7 2 +TOP.toplevel.main;s2_mul;s2_mul6 2 +TOP.toplevel.main;s2_mul;s2_mul5 2 +TOP.toplevel.main;s2_mul;s2_mul4 2 +TOP.toplevel.main;s2_mul;s2_mul3 2 +TOP.toplevel.main;s2_mul;s2_mul2 2 +TOP.toplevel.main;s2_mul;s2_mul1 2 +TOP.toplevel.main;s2_mul;s2_mul0 2 +TOP.toplevel.main;s3_mul;s3_mul9 2 +TOP.toplevel.main;s3_mul;s3_mul8 2 +TOP.toplevel.main;s3_mul;s3_mul7 2 +TOP.toplevel.main;s3_mul;s3_mul6 2 +TOP.toplevel.main;s3_mul;s3_mul5 2 +TOP.toplevel.main;s3_mul;s3_mul4 2 +TOP.toplevel.main;s3_mul;s3_mul3 2 +TOP.toplevel.main;s3_mul;s3_mul2 2 +TOP.toplevel.main;s3_mul;s3_mul1 2 +TOP.toplevel.main;s3_mul;s3_mul0 2 +TOP.toplevel.main;s4_mul;s4_mul9 2 +TOP.toplevel.main;s4_mul;s4_mul8 2 +TOP.toplevel.main;s4_mul;s4_mul7 2 +TOP.toplevel.main;s4_mul;s4_mul6 2 +TOP.toplevel.main;s4_mul;s4_mul5 2 +TOP.toplevel.main;s4_mul;s4_mul4 2 +TOP.toplevel.main;s4_mul;s4_mul3 2 +TOP.toplevel.main;s4_mul;s4_mul2 2 +TOP.toplevel.main;s4_mul;s4_mul1 2 +TOP.toplevel.main;s4_mul;s4_mul0 2 +TOP.toplevel.main;s0_mul;s0_mul15 2 +TOP.toplevel.main;s0_mul;s0_mul14 2 +TOP.toplevel.main;s0_mul;s0_mul13 2 +TOP.toplevel.main;s0_mul;s0_mul12 2 +TOP.toplevel.main;s0_mul;s0_mul11 2 +TOP.toplevel.main;s0_mul;s0_mul10 2 +TOP.toplevel.main;s1_mul;s1_mul15 2 +TOP.toplevel.main;s1_mul;s1_mul14 2 +TOP.toplevel.main;s1_mul;s1_mul13 2 +TOP.toplevel.main;s1_mul;s1_mul12 2 +TOP.toplevel.main;s1_mul;s1_mul11 2 +TOP.toplevel.main;s1_mul;s1_mul10 2 +TOP.toplevel.main;s2_mul;s2_mul15 2 +TOP.toplevel.main;s2_mul;s2_mul14 2 +TOP.toplevel.main;s2_mul;s2_mul13 2 +TOP.toplevel.main;s2_mul;s2_mul12 2 +TOP.toplevel.main;s2_mul;s2_mul11 2 +TOP.toplevel.main;s2_mul;s2_mul10 2 +TOP.toplevel.main;s3_mul;s3_mul15 2 +TOP.toplevel.main;s3_mul;s3_mul14 2 +TOP.toplevel.main;s3_mul;s3_mul13 2 +TOP.toplevel.main;s3_mul;s3_mul12 2 +TOP.toplevel.main;s3_mul;s3_mul11 2 +TOP.toplevel.main;s3_mul;s3_mul10 2 +TOP.toplevel.main;s4_mul;s4_mul15 2 +TOP.toplevel.main;s4_mul;s4_mul14 2 +TOP.toplevel.main;s4_mul;s4_mul13 2 +TOP.toplevel.main;s4_mul;s4_mul12 2 +TOP.toplevel.main;s4_mul;s4_mul11 2 +TOP.toplevel.main;s4_mul;s4_mul10 2 +TOP.toplevel.main;preamble;preamble_0 1 +TOP.toplevel.main;preamble;preamble_1 1 +TOP.toplevel.main;preamble;preamble_2 1 +TOP.toplevel.main;preamble;preamble_3 1 +TOP.toplevel.main;preamble;preamble_4 1 +TOP.toplevel.main;preamble;preamble_5 1 +TOP.toplevel.main;preamble;preamble_6 1 +TOP.toplevel.main;preamble;preamble_7 1 +TOP.toplevel.main;preamble;preamble_8 1 +TOP.toplevel.main;preamble;preamble_9 1 +TOP.toplevel.main;epilogue;epilogue_0 1 +TOP.toplevel.main;epilogue;epilogue_1 1 +TOP.toplevel.main;epilogue;epilogue_2 1 +TOP.toplevel.main;epilogue;epilogue_3 1 +TOP.toplevel.main;epilogue;epilogue_4 1 +TOP.toplevel.main;epilogue;epilogue_5 1 +TOP.toplevel.main;epilogue;epilogue_6 1 +TOP.toplevel.main;epilogue;epilogue_7 1 +TOP.toplevel.main;epilogue;epilogue_8 1 +TOP.toplevel.main;epilogue;epilogue_9 1 +TOP.toplevel.main;preamble;preamble_10 1 +TOP.toplevel.main;preamble;preamble_11 1 +TOP.toplevel.main;preamble;preamble_12 1 +TOP.toplevel.main;preamble;preamble_13 1 +TOP.toplevel.main;preamble;preamble_14 1 +TOP.toplevel.main;preamble;preamble_15 1 +TOP.toplevel.main;preamble;preamble_16 1 +TOP.toplevel.main;preamble;preamble_17 1 +TOP.toplevel.main;preamble;preamble_18 1 +TOP.toplevel.main;preamble;preamble_19 1 +TOP.toplevel.main;preamble;preamble_20 1 +TOP.toplevel.main;preamble;preamble_21 1 +TOP.toplevel.main;preamble;preamble_22 1 +TOP.toplevel.main;preamble;preamble_23 1 +TOP.toplevel.main;preamble;preamble_24 1 +TOP.toplevel.main;preamble;preamble_25 1 +TOP.toplevel.main;preamble;preamble_26 1 +TOP.toplevel.main;preamble;preamble_27 1 +TOP.toplevel.main;preamble;preamble_28 1 +TOP.toplevel.main;preamble;preamble_29 1 +TOP.toplevel.main;preamble;preamble_30 1 +TOP.toplevel.main;preamble;preamble_31 1 +TOP.toplevel.main;precursor;precursor_9 4 +TOP.toplevel.main;precursor;precursor_8 4 +TOP.toplevel.main;precursor;precursor_7 4 +TOP.toplevel.main;precursor;precursor_6 4 +TOP.toplevel.main;precursor;precursor_5 4 +TOP.toplevel.main;precursor;precursor_4 4 +TOP.toplevel.main;precursor;precursor_3 4 +TOP.toplevel.main;precursor;precursor_2 4 +TOP.toplevel.main;precursor;precursor_1 4 +TOP.toplevel.main;precursor;precursor_0 4 +TOP.toplevel.main;epilogue;epilogue_10 1 +TOP.toplevel.main;epilogue;epilogue_11 1 +TOP.toplevel.main;epilogue;epilogue_12 1 +TOP.toplevel.main;epilogue;epilogue_13 1 +TOP.toplevel.main;epilogue;epilogue_14 1 +TOP.toplevel.main;epilogue;epilogue_15 1 +TOP.toplevel.main;epilogue;epilogue_16 1 +TOP.toplevel.main;epilogue;epilogue_17 1 +TOP.toplevel.main;epilogue;epilogue_18 1 +TOP.toplevel.main;epilogue;epilogue_19 1 +TOP.toplevel.main;epilogue;epilogue_20 1 +TOP.toplevel.main;epilogue;epilogue_21 1 +TOP.toplevel.main;epilogue;epilogue_22 1 +TOP.toplevel.main;epilogue;epilogue_23 1 +TOP.toplevel.main;epilogue;epilogue_24 1 +TOP.toplevel.main;epilogue;epilogue_25 1 +TOP.toplevel.main;epilogue;epilogue_26 1 +TOP.toplevel.main;epilogue;epilogue_27 1 +TOP.toplevel.main;epilogue;epilogue_28 1 +TOP.toplevel.main;epilogue;epilogue_29 1 +TOP.toplevel.main;epilogue;epilogue_30 1 +TOP.toplevel.main;epilogue;epilogue_31 1 +TOP.toplevel.main;s0_r9_op_mod 35 +TOP.toplevel.main;s0_r8_op_mod 35 +TOP.toplevel.main;s0_r7_op_mod 35 +TOP.toplevel.main;s0_r6_op_mod 35 +TOP.toplevel.main;s0_r5_op_mod 35 +TOP.toplevel.main;s0_r4_op_mod 35 +TOP.toplevel.main;s0_r3_op_mod 35 +TOP.toplevel.main;s0_r2_op_mod 35 +TOP.toplevel.main;s0_r1_op_mod 35 +TOP.toplevel.main;s0_r0_op_mod 35 +TOP.toplevel.main;precursor;precursor_31 4 +TOP.toplevel.main;precursor;precursor_30 4 +TOP.toplevel.main;precursor;precursor_29 4 +TOP.toplevel.main;precursor;precursor_28 4 +TOP.toplevel.main;precursor;precursor_27 4 +TOP.toplevel.main;precursor;precursor_26 4 +TOP.toplevel.main;precursor;precursor_25 4 +TOP.toplevel.main;precursor;precursor_24 4 +TOP.toplevel.main;precursor;precursor_23 4 +TOP.toplevel.main;precursor;precursor_22 4 +TOP.toplevel.main;precursor;precursor_21 4 +TOP.toplevel.main;precursor;precursor_20 4 +TOP.toplevel.main;precursor;precursor_19 4 +TOP.toplevel.main;precursor;precursor_18 4 +TOP.toplevel.main;precursor;precursor_17 4 +TOP.toplevel.main;precursor;precursor_16 4 +TOP.toplevel.main;precursor;precursor_15 4 +TOP.toplevel.main;precursor;precursor_14 4 +TOP.toplevel.main;precursor;precursor_13 4 +TOP.toplevel.main;precursor;precursor_12 4 +TOP.toplevel.main;precursor;precursor_11 4 +TOP.toplevel.main;precursor;precursor_10 4 +TOP.toplevel.main;s1_r9_op_mod 35 +TOP.toplevel.main;s1_r8_op_mod 35 +TOP.toplevel.main;s1_r7_op_mod 35 +TOP.toplevel.main;s1_r6_op_mod 35 +TOP.toplevel.main;s1_r5_op_mod 35 +TOP.toplevel.main;s1_r4_op_mod 35 +TOP.toplevel.main;s1_r3_op_mod 35 +TOP.toplevel.main;s1_r2_op_mod 35 +TOP.toplevel.main;s1_r1_op_mod 35 +TOP.toplevel.main;s1_r0_op_mod 35 +TOP.toplevel.main;s2_r9_op_mod 35 +TOP.toplevel.main;s2_r8_op_mod 35 +TOP.toplevel.main;s2_r7_op_mod 35 +TOP.toplevel.main;s2_r6_op_mod 35 +TOP.toplevel.main;s2_r5_op_mod 35 +TOP.toplevel.main;s2_r4_op_mod 35 +TOP.toplevel.main;s2_r3_op_mod 35 +TOP.toplevel.main;s2_r2_op_mod 35 +TOP.toplevel.main;s2_r1_op_mod 35 +TOP.toplevel.main;s2_r0_op_mod 35 +TOP.toplevel.main;s3_r9_op_mod 35 +TOP.toplevel.main;s3_r8_op_mod 35 +TOP.toplevel.main;s3_r7_op_mod 35 +TOP.toplevel.main;s3_r6_op_mod 35 +TOP.toplevel.main;s3_r5_op_mod 35 +TOP.toplevel.main;s3_r4_op_mod 35 +TOP.toplevel.main;s3_r3_op_mod 35 +TOP.toplevel.main;s3_r2_op_mod 35 +TOP.toplevel.main;s3_r1_op_mod 35 +TOP.toplevel.main;s3_r0_op_mod 35 +TOP.toplevel.main;s4_r9_op_mod 35 +TOP.toplevel.main;s4_r8_op_mod 35 +TOP.toplevel.main;s4_r7_op_mod 35 +TOP.toplevel.main;s4_r6_op_mod 35 +TOP.toplevel.main;s4_r5_op_mod 35 +TOP.toplevel.main;s4_r4_op_mod 35 +TOP.toplevel.main;s4_r3_op_mod 35 +TOP.toplevel.main;s4_r2_op_mod 35 +TOP.toplevel.main;s4_r1_op_mod 35 +TOP.toplevel.main;s4_r0_op_mod 35 +TOP.toplevel.main;s0_r31_op_mod 35 +TOP.toplevel.main;s0_r30_op_mod 35 +TOP.toplevel.main;s0_r29_op_mod 35 +TOP.toplevel.main;s0_r28_op_mod 35 +TOP.toplevel.main;s0_r27_op_mod 35 +TOP.toplevel.main;s0_r26_op_mod 35 +TOP.toplevel.main;s0_r25_op_mod 35 +TOP.toplevel.main;s0_r24_op_mod 35 +TOP.toplevel.main;s0_r23_op_mod 35 +TOP.toplevel.main;s0_r22_op_mod 35 +TOP.toplevel.main;s0_r21_op_mod 35 +TOP.toplevel.main;s0_r20_op_mod 35 +TOP.toplevel.main;s0_r19_op_mod 35 +TOP.toplevel.main;s0_r18_op_mod 35 +TOP.toplevel.main;s0_r17_op_mod 35 +TOP.toplevel.main;s0_r16_op_mod 35 +TOP.toplevel.main;s0_r15_op_mod 35 +TOP.toplevel.main;s0_r14_op_mod 35 +TOP.toplevel.main;s0_r13_op_mod 35 +TOP.toplevel.main;s0_r12_op_mod 35 +TOP.toplevel.main;s0_r11_op_mod 35 +TOP.toplevel.main;s0_r10_op_mod 35 +TOP.toplevel.main;s1_r31_op_mod 35 +TOP.toplevel.main;s1_r30_op_mod 35 +TOP.toplevel.main;s1_r29_op_mod 35 +TOP.toplevel.main;s1_r28_op_mod 35 +TOP.toplevel.main;s1_r27_op_mod 35 +TOP.toplevel.main;s1_r26_op_mod 35 +TOP.toplevel.main;s1_r25_op_mod 35 +TOP.toplevel.main;s1_r24_op_mod 35 +TOP.toplevel.main;s1_r23_op_mod 35 +TOP.toplevel.main;s1_r22_op_mod 35 +TOP.toplevel.main;s1_r21_op_mod 35 +TOP.toplevel.main;s1_r20_op_mod 35 +TOP.toplevel.main;s1_r19_op_mod 35 +TOP.toplevel.main;s1_r18_op_mod 35 +TOP.toplevel.main;s1_r17_op_mod 35 +TOP.toplevel.main;s1_r16_op_mod 35 +TOP.toplevel.main;s1_r15_op_mod 35 +TOP.toplevel.main;s1_r14_op_mod 35 +TOP.toplevel.main;s1_r13_op_mod 35 +TOP.toplevel.main;s1_r12_op_mod 35 +TOP.toplevel.main;s1_r11_op_mod 35 +TOP.toplevel.main;s1_r10_op_mod 35 +TOP.toplevel.main;s2_r31_op_mod 35 +TOP.toplevel.main;s2_r30_op_mod 35 +TOP.toplevel.main;s2_r29_op_mod 35 +TOP.toplevel.main;s2_r28_op_mod 35 +TOP.toplevel.main;s2_r27_op_mod 35 +TOP.toplevel.main;s2_r26_op_mod 35 +TOP.toplevel.main;s2_r25_op_mod 35 +TOP.toplevel.main;s2_r24_op_mod 35 +TOP.toplevel.main;s2_r23_op_mod 35 +TOP.toplevel.main;s2_r22_op_mod 35 +TOP.toplevel.main;s2_r21_op_mod 35 +TOP.toplevel.main;s2_r20_op_mod 35 +TOP.toplevel.main;s2_r19_op_mod 35 +TOP.toplevel.main;s2_r18_op_mod 35 +TOP.toplevel.main;s2_r17_op_mod 35 +TOP.toplevel.main;s2_r16_op_mod 35 +TOP.toplevel.main;s2_r15_op_mod 35 +TOP.toplevel.main;s2_r14_op_mod 35 +TOP.toplevel.main;s2_r13_op_mod 35 +TOP.toplevel.main;s2_r12_op_mod 35 +TOP.toplevel.main;s2_r11_op_mod 35 +TOP.toplevel.main;s2_r10_op_mod 35 +TOP.toplevel.main;s3_r31_op_mod 35 +TOP.toplevel.main;s3_r30_op_mod 35 +TOP.toplevel.main;s3_r29_op_mod 35 +TOP.toplevel.main;s3_r28_op_mod 35 +TOP.toplevel.main;s3_r27_op_mod 35 +TOP.toplevel.main;s3_r26_op_mod 35 +TOP.toplevel.main;s3_r25_op_mod 35 +TOP.toplevel.main;s3_r24_op_mod 35 +TOP.toplevel.main;s3_r23_op_mod 35 +TOP.toplevel.main;s3_r22_op_mod 35 +TOP.toplevel.main;s3_r21_op_mod 35 +TOP.toplevel.main;s3_r20_op_mod 35 +TOP.toplevel.main;s3_r19_op_mod 35 +TOP.toplevel.main;s3_r18_op_mod 35 +TOP.toplevel.main;s3_r17_op_mod 35 +TOP.toplevel.main;s3_r16_op_mod 35 +TOP.toplevel.main;s3_r15_op_mod 35 +TOP.toplevel.main;s3_r14_op_mod 35 +TOP.toplevel.main;s3_r13_op_mod 35 +TOP.toplevel.main;s3_r12_op_mod 35 +TOP.toplevel.main;s3_r11_op_mod 35 +TOP.toplevel.main;s3_r10_op_mod 35 +TOP.toplevel.main;s4_r31_op_mod 35 +TOP.toplevel.main;s4_r30_op_mod 35 +TOP.toplevel.main;s4_r29_op_mod 35 +TOP.toplevel.main;s4_r28_op_mod 35 +TOP.toplevel.main;s4_r27_op_mod 35 +TOP.toplevel.main;s4_r26_op_mod 35 +TOP.toplevel.main;s4_r25_op_mod 35 +TOP.toplevel.main;s4_r24_op_mod 35 +TOP.toplevel.main;s4_r23_op_mod 35 +TOP.toplevel.main;s4_r22_op_mod 35 +TOP.toplevel.main;s4_r21_op_mod 35 +TOP.toplevel.main;s4_r20_op_mod 35 +TOP.toplevel.main;s4_r19_op_mod 35 +TOP.toplevel.main;s4_r18_op_mod 35 +TOP.toplevel.main;s4_r17_op_mod 35 +TOP.toplevel.main;s4_r16_op_mod 35 +TOP.toplevel.main;s4_r15_op_mod 35 +TOP.toplevel.main;s4_r14_op_mod 35 +TOP.toplevel.main;s4_r13_op_mod 35 +TOP.toplevel.main;s4_r12_op_mod 35 +TOP.toplevel.main;s4_r11_op_mod 35 +TOP.toplevel.main;s4_r10_op_mod 35 diff --git a/tools/profiler/handmade-flame-graphs/ntt-32/new-flame.folded b/tools/profiler/handmade-flame-graphs/ntt-32/new-flame.folded new file mode 100644 index 000000000..c1971af9e --- /dev/null +++ b/tools/profiler/handmade-flame-graphs/ntt-32/new-flame.folded @@ -0,0 +1,337 @@ +TOP.toplevel.main 432 +TOP.toplevel.main;s0_mul;s0_mul9 2 +TOP.toplevel.main;s0_mul;s0_mul8 2 +TOP.toplevel.main;s0_mul;s0_mul7 2 +TOP.toplevel.main;s0_mul;s0_mul6 2 +TOP.toplevel.main;s0_mul;s0_mul5 2 +TOP.toplevel.main;s0_mul;s0_mul4 2 +TOP.toplevel.main;s0_mul;s0_mul3 2 +TOP.toplevel.main;s0_mul;s0_mul2 2 +TOP.toplevel.main;s0_mul;s0_mul1 2 +TOP.toplevel.main;s0_mul;s0_mul0 2 +TOP.toplevel.main;s1_mul;s1_mul9 2 +TOP.toplevel.main;s1_mul;s1_mul8 2 +TOP.toplevel.main;s1_mul;s1_mul7 2 +TOP.toplevel.main;s1_mul;s1_mul6 2 +TOP.toplevel.main;s1_mul;s1_mul5 2 +TOP.toplevel.main;s1_mul;s1_mul4 2 +TOP.toplevel.main;s1_mul;s1_mul3 2 +TOP.toplevel.main;s1_mul;s1_mul2 2 +TOP.toplevel.main;s1_mul;s1_mul1 2 +TOP.toplevel.main;s1_mul;s1_mul0 2 +TOP.toplevel.main;s2_mul;s2_mul9 2 +TOP.toplevel.main;s2_mul;s2_mul8 2 +TOP.toplevel.main;s2_mul;s2_mul7 2 +TOP.toplevel.main;s2_mul;s2_mul6 2 +TOP.toplevel.main;s2_mul;s2_mul5 2 +TOP.toplevel.main;s2_mul;s2_mul4 2 +TOP.toplevel.main;s2_mul;s2_mul3 2 +TOP.toplevel.main;s2_mul;s2_mul2 2 +TOP.toplevel.main;s2_mul;s2_mul1 2 +TOP.toplevel.main;s2_mul;s2_mul0 2 +TOP.toplevel.main;s3_mul;s3_mul9 2 +TOP.toplevel.main;s3_mul;s3_mul8 2 +TOP.toplevel.main;s3_mul;s3_mul7 2 +TOP.toplevel.main;s3_mul;s3_mul6 2 +TOP.toplevel.main;s3_mul;s3_mul5 2 +TOP.toplevel.main;s3_mul;s3_mul4 2 +TOP.toplevel.main;s3_mul;s3_mul3 2 +TOP.toplevel.main;s3_mul;s3_mul2 2 +TOP.toplevel.main;s3_mul;s3_mul1 2 +TOP.toplevel.main;s3_mul;s3_mul0 2 +TOP.toplevel.main;s4_mul;s4_mul9 2 +TOP.toplevel.main;s4_mul;s4_mul8 2 +TOP.toplevel.main;s4_mul;s4_mul7 2 +TOP.toplevel.main;s4_mul;s4_mul6 2 +TOP.toplevel.main;s4_mul;s4_mul5 2 +TOP.toplevel.main;s4_mul;s4_mul4 2 +TOP.toplevel.main;s4_mul;s4_mul3 2 +TOP.toplevel.main;s4_mul;s4_mul2 2 +TOP.toplevel.main;s4_mul;s4_mul1 2 +TOP.toplevel.main;s4_mul;s4_mul0 2 +TOP.toplevel.main;s0_mul;s0_mul15 2 +TOP.toplevel.main;s0_mul;s0_mul14 2 +TOP.toplevel.main;s0_mul;s0_mul13 2 +TOP.toplevel.main;s0_mul;s0_mul12 2 +TOP.toplevel.main;s0_mul;s0_mul11 2 +TOP.toplevel.main;s0_mul;s0_mul10 2 +TOP.toplevel.main;s1_mul;s1_mul15 2 +TOP.toplevel.main;s1_mul;s1_mul14 2 +TOP.toplevel.main;s1_mul;s1_mul13 2 +TOP.toplevel.main;s1_mul;s1_mul12 2 +TOP.toplevel.main;s1_mul;s1_mul11 2 +TOP.toplevel.main;s1_mul;s1_mul10 2 +TOP.toplevel.main;s2_mul;s2_mul15 2 +TOP.toplevel.main;s2_mul;s2_mul14 2 +TOP.toplevel.main;s2_mul;s2_mul13 2 +TOP.toplevel.main;s2_mul;s2_mul12 2 +TOP.toplevel.main;s2_mul;s2_mul11 2 +TOP.toplevel.main;s2_mul;s2_mul10 2 +TOP.toplevel.main;s3_mul;s3_mul15 2 +TOP.toplevel.main;s3_mul;s3_mul14 2 +TOP.toplevel.main;s3_mul;s3_mul13 2 +TOP.toplevel.main;s3_mul;s3_mul12 2 +TOP.toplevel.main;s3_mul;s3_mul11 2 +TOP.toplevel.main;s3_mul;s3_mul10 2 +TOP.toplevel.main;s4_mul;s4_mul15 2 +TOP.toplevel.main;s4_mul;s4_mul14 2 +TOP.toplevel.main;s4_mul;s4_mul13 2 +TOP.toplevel.main;s4_mul;s4_mul12 2 +TOP.toplevel.main;s4_mul;s4_mul11 2 +TOP.toplevel.main;s4_mul;s4_mul10 2 +TOP.toplevel.main;preamble;preamble_0 1 +TOP.toplevel.main;preamble;preamble_1 1 +TOP.toplevel.main;preamble;preamble_2 1 +TOP.toplevel.main;preamble;preamble_3 1 +TOP.toplevel.main;preamble;preamble_4 1 +TOP.toplevel.main;preamble;preamble_5 1 +TOP.toplevel.main;preamble;preamble_6 1 +TOP.toplevel.main;preamble;preamble_7 1 +TOP.toplevel.main;preamble;preamble_8 1 +TOP.toplevel.main;preamble;preamble_9 1 +TOP.toplevel.main;epilogue;epilogue_0 1 +TOP.toplevel.main;epilogue;epilogue_1 1 +TOP.toplevel.main;epilogue;epilogue_2 1 +TOP.toplevel.main;epilogue;epilogue_3 1 +TOP.toplevel.main;epilogue;epilogue_4 1 +TOP.toplevel.main;epilogue;epilogue_5 1 +TOP.toplevel.main;epilogue;epilogue_6 1 +TOP.toplevel.main;epilogue;epilogue_7 1 +TOP.toplevel.main;epilogue;epilogue_8 1 +TOP.toplevel.main;epilogue;epilogue_9 1 +TOP.toplevel.main;preamble;preamble_10 1 +TOP.toplevel.main;preamble;preamble_11 1 +TOP.toplevel.main;preamble;preamble_12 1 +TOP.toplevel.main;preamble;preamble_13 1 +TOP.toplevel.main;preamble;preamble_14 1 +TOP.toplevel.main;preamble;preamble_15 1 +TOP.toplevel.main;preamble;preamble_16 1 +TOP.toplevel.main;preamble;preamble_17 1 +TOP.toplevel.main;preamble;preamble_18 1 +TOP.toplevel.main;preamble;preamble_19 1 +TOP.toplevel.main;preamble;preamble_20 1 +TOP.toplevel.main;preamble;preamble_21 1 +TOP.toplevel.main;preamble;preamble_22 1 +TOP.toplevel.main;preamble;preamble_23 1 +TOP.toplevel.main;preamble;preamble_24 1 +TOP.toplevel.main;preamble;preamble_25 1 +TOP.toplevel.main;preamble;preamble_26 1 +TOP.toplevel.main;preamble;preamble_27 1 +TOP.toplevel.main;preamble;preamble_28 1 +TOP.toplevel.main;preamble;preamble_29 1 +TOP.toplevel.main;preamble;preamble_30 1 +TOP.toplevel.main;preamble;preamble_31 1 +TOP.toplevel.main;precursor;precursor_9 4 +TOP.toplevel.main;precursor;precursor_8 4 +TOP.toplevel.main;precursor;precursor_7 4 +TOP.toplevel.main;precursor;precursor_6 4 +TOP.toplevel.main;precursor;precursor_5 4 +TOP.toplevel.main;precursor;precursor_4 4 +TOP.toplevel.main;precursor;precursor_3 4 +TOP.toplevel.main;precursor;precursor_2 4 +TOP.toplevel.main;precursor;precursor_1 4 +TOP.toplevel.main;precursor;precursor_0 4 +TOP.toplevel.main;epilogue;epilogue_10 1 +TOP.toplevel.main;epilogue;epilogue_11 1 +TOP.toplevel.main;epilogue;epilogue_12 1 +TOP.toplevel.main;epilogue;epilogue_13 1 +TOP.toplevel.main;epilogue;epilogue_14 1 +TOP.toplevel.main;epilogue;epilogue_15 1 +TOP.toplevel.main;epilogue;epilogue_16 1 +TOP.toplevel.main;epilogue;epilogue_17 1 +TOP.toplevel.main;epilogue;epilogue_18 1 +TOP.toplevel.main;epilogue;epilogue_19 1 +TOP.toplevel.main;epilogue;epilogue_20 1 +TOP.toplevel.main;epilogue;epilogue_21 1 +TOP.toplevel.main;epilogue;epilogue_22 1 +TOP.toplevel.main;epilogue;epilogue_23 1 +TOP.toplevel.main;epilogue;epilogue_24 1 +TOP.toplevel.main;epilogue;epilogue_25 1 +TOP.toplevel.main;epilogue;epilogue_26 1 +TOP.toplevel.main;epilogue;epilogue_27 1 +TOP.toplevel.main;epilogue;epilogue_28 1 +TOP.toplevel.main;epilogue;epilogue_29 1 +TOP.toplevel.main;epilogue;epilogue_30 1 +TOP.toplevel.main;epilogue;epilogue_31 1 +TOP.toplevel.main;s0_op_mod;s0_r9_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r8_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r7_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r6_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r5_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r4_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r3_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r2_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r1_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r0_op_mod 35 +TOP.toplevel.main;precursor;precursor_31 4 +TOP.toplevel.main;precursor;precursor_30 4 +TOP.toplevel.main;precursor;precursor_29 4 +TOP.toplevel.main;precursor;precursor_28 4 +TOP.toplevel.main;precursor;precursor_27 4 +TOP.toplevel.main;precursor;precursor_26 4 +TOP.toplevel.main;precursor;precursor_25 4 +TOP.toplevel.main;precursor;precursor_24 4 +TOP.toplevel.main;precursor;precursor_23 4 +TOP.toplevel.main;precursor;precursor_22 4 +TOP.toplevel.main;precursor;precursor_21 4 +TOP.toplevel.main;precursor;precursor_20 4 +TOP.toplevel.main;precursor;precursor_19 4 +TOP.toplevel.main;precursor;precursor_18 4 +TOP.toplevel.main;precursor;precursor_17 4 +TOP.toplevel.main;precursor;precursor_16 4 +TOP.toplevel.main;precursor;precursor_15 4 +TOP.toplevel.main;precursor;precursor_14 4 +TOP.toplevel.main;precursor;precursor_13 4 +TOP.toplevel.main;precursor;precursor_12 4 +TOP.toplevel.main;precursor;precursor_11 4 +TOP.toplevel.main;precursor;precursor_10 4 +TOP.toplevel.main;s1_op_mod;s1_r9_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r8_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r7_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r6_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r5_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r4_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r3_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r2_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r1_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r0_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r9_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r8_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r7_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r6_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r5_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r4_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r3_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r2_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r1_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r0_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r9_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r8_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r7_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r6_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r5_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r4_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r3_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r2_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r1_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r0_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r9_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r8_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r7_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r6_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r5_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r4_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r3_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r2_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r1_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r0_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r31_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r30_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r29_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r28_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r27_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r26_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r25_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r24_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r23_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r22_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r21_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r20_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r19_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r18_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r17_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r16_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r15_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r14_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r13_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r12_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r11_op_mod 35 +TOP.toplevel.main;s0_op_mod;s0_r10_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r31_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r30_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r29_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r28_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r27_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r26_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r25_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r24_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r23_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r22_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r21_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r20_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r19_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r18_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r17_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r16_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r15_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r14_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r13_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r12_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r11_op_mod 35 +TOP.toplevel.main;s1_op_mod;s1_r10_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r31_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r30_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r29_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r28_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r27_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r26_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r25_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r24_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r23_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r22_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r21_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r20_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r19_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r18_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r17_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r16_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r15_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r14_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r13_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r12_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r11_op_mod 35 +TOP.toplevel.main;s2_op_mod;s2_r10_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r31_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r30_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r29_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r28_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r27_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r26_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r25_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r24_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r23_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r22_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r21_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r20_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r19_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r18_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r17_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r16_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r15_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r14_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r13_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r12_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r11_op_mod 35 +TOP.toplevel.main;s3_op_mod;s3_r10_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r31_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r30_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r29_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r28_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r27_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r26_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r25_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r24_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r23_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r22_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r21_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r20_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r19_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r18_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r17_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r16_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r15_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r14_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r13_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r12_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r11_op_mod 35 +TOP.toplevel.main;s4_op_mod;s4_r10_op_mod 35 diff --git a/tools/profiler/handmade-flame-graphs/ntt-32/original-flame.folded b/tools/profiler/handmade-flame-graphs/ntt-32/original-flame.folded new file mode 100644 index 000000000..df5e2e096 --- /dev/null +++ b/tools/profiler/handmade-flame-graphs/ntt-32/original-flame.folded @@ -0,0 +1,337 @@ +TOP.toplevel.main 432 +TOP.toplevel.main;s0_mul9 2 +TOP.toplevel.main;s0_mul8 2 +TOP.toplevel.main;s0_mul7 2 +TOP.toplevel.main;s0_mul6 2 +TOP.toplevel.main;s0_mul5 2 +TOP.toplevel.main;s0_mul4 2 +TOP.toplevel.main;s0_mul3 2 +TOP.toplevel.main;s0_mul2 2 +TOP.toplevel.main;s0_mul1 2 +TOP.toplevel.main;s0_mul0 2 +TOP.toplevel.main;s1_mul9 2 +TOP.toplevel.main;s1_mul8 2 +TOP.toplevel.main;s1_mul7 2 +TOP.toplevel.main;s1_mul6 2 +TOP.toplevel.main;s1_mul5 2 +TOP.toplevel.main;s1_mul4 2 +TOP.toplevel.main;s1_mul3 2 +TOP.toplevel.main;s1_mul2 2 +TOP.toplevel.main;s1_mul1 2 +TOP.toplevel.main;s1_mul0 2 +TOP.toplevel.main;s2_mul9 2 +TOP.toplevel.main;s2_mul8 2 +TOP.toplevel.main;s2_mul7 2 +TOP.toplevel.main;s2_mul6 2 +TOP.toplevel.main;s2_mul5 2 +TOP.toplevel.main;s2_mul4 2 +TOP.toplevel.main;s2_mul3 2 +TOP.toplevel.main;s2_mul2 2 +TOP.toplevel.main;s2_mul1 2 +TOP.toplevel.main;s2_mul0 2 +TOP.toplevel.main;s3_mul9 2 +TOP.toplevel.main;s3_mul8 2 +TOP.toplevel.main;s3_mul7 2 +TOP.toplevel.main;s3_mul6 2 +TOP.toplevel.main;s3_mul5 2 +TOP.toplevel.main;s3_mul4 2 +TOP.toplevel.main;s3_mul3 2 +TOP.toplevel.main;s3_mul2 2 +TOP.toplevel.main;s3_mul1 2 +TOP.toplevel.main;s3_mul0 2 +TOP.toplevel.main;s4_mul9 2 +TOP.toplevel.main;s4_mul8 2 +TOP.toplevel.main;s4_mul7 2 +TOP.toplevel.main;s4_mul6 2 +TOP.toplevel.main;s4_mul5 2 +TOP.toplevel.main;s4_mul4 2 +TOP.toplevel.main;s4_mul3 2 +TOP.toplevel.main;s4_mul2 2 +TOP.toplevel.main;s4_mul1 2 +TOP.toplevel.main;s4_mul0 2 +TOP.toplevel.main;s0_mul15 2 +TOP.toplevel.main;s0_mul14 2 +TOP.toplevel.main;s0_mul13 2 +TOP.toplevel.main;s0_mul12 2 +TOP.toplevel.main;s0_mul11 2 +TOP.toplevel.main;s0_mul10 2 +TOP.toplevel.main;s1_mul15 2 +TOP.toplevel.main;s1_mul14 2 +TOP.toplevel.main;s1_mul13 2 +TOP.toplevel.main;s1_mul12 2 +TOP.toplevel.main;s1_mul11 2 +TOP.toplevel.main;s1_mul10 2 +TOP.toplevel.main;s2_mul15 2 +TOP.toplevel.main;s2_mul14 2 +TOP.toplevel.main;s2_mul13 2 +TOP.toplevel.main;s2_mul12 2 +TOP.toplevel.main;s2_mul11 2 +TOP.toplevel.main;s2_mul10 2 +TOP.toplevel.main;s3_mul15 2 +TOP.toplevel.main;s3_mul14 2 +TOP.toplevel.main;s3_mul13 2 +TOP.toplevel.main;s3_mul12 2 +TOP.toplevel.main;s3_mul11 2 +TOP.toplevel.main;s3_mul10 2 +TOP.toplevel.main;s4_mul15 2 +TOP.toplevel.main;s4_mul14 2 +TOP.toplevel.main;s4_mul13 2 +TOP.toplevel.main;s4_mul12 2 +TOP.toplevel.main;s4_mul11 2 +TOP.toplevel.main;s4_mul10 2 +TOP.toplevel.main;preamble_0 1 +TOP.toplevel.main;preamble_1 1 +TOP.toplevel.main;preamble_2 1 +TOP.toplevel.main;preamble_3 1 +TOP.toplevel.main;preamble_4 1 +TOP.toplevel.main;preamble_5 1 +TOP.toplevel.main;preamble_6 1 +TOP.toplevel.main;preamble_7 1 +TOP.toplevel.main;preamble_8 1 +TOP.toplevel.main;preamble_9 1 +TOP.toplevel.main;epilogue_0 1 +TOP.toplevel.main;epilogue_1 1 +TOP.toplevel.main;epilogue_2 1 +TOP.toplevel.main;epilogue_3 1 +TOP.toplevel.main;epilogue_4 1 +TOP.toplevel.main;epilogue_5 1 +TOP.toplevel.main;epilogue_6 1 +TOP.toplevel.main;epilogue_7 1 +TOP.toplevel.main;epilogue_8 1 +TOP.toplevel.main;epilogue_9 1 +TOP.toplevel.main;preamble_10 1 +TOP.toplevel.main;preamble_11 1 +TOP.toplevel.main;preamble_12 1 +TOP.toplevel.main;preamble_13 1 +TOP.toplevel.main;preamble_14 1 +TOP.toplevel.main;preamble_15 1 +TOP.toplevel.main;preamble_16 1 +TOP.toplevel.main;preamble_17 1 +TOP.toplevel.main;preamble_18 1 +TOP.toplevel.main;preamble_19 1 +TOP.toplevel.main;preamble_20 1 +TOP.toplevel.main;preamble_21 1 +TOP.toplevel.main;preamble_22 1 +TOP.toplevel.main;preamble_23 1 +TOP.toplevel.main;preamble_24 1 +TOP.toplevel.main;preamble_25 1 +TOP.toplevel.main;preamble_26 1 +TOP.toplevel.main;preamble_27 1 +TOP.toplevel.main;preamble_28 1 +TOP.toplevel.main;preamble_29 1 +TOP.toplevel.main;preamble_30 1 +TOP.toplevel.main;preamble_31 1 +TOP.toplevel.main;precursor_9 4 +TOP.toplevel.main;precursor_8 4 +TOP.toplevel.main;precursor_7 4 +TOP.toplevel.main;precursor_6 4 +TOP.toplevel.main;precursor_5 4 +TOP.toplevel.main;precursor_4 4 +TOP.toplevel.main;precursor_3 4 +TOP.toplevel.main;precursor_2 4 +TOP.toplevel.main;precursor_1 4 +TOP.toplevel.main;precursor_0 4 +TOP.toplevel.main;epilogue_10 1 +TOP.toplevel.main;epilogue_11 1 +TOP.toplevel.main;epilogue_12 1 +TOP.toplevel.main;epilogue_13 1 +TOP.toplevel.main;epilogue_14 1 +TOP.toplevel.main;epilogue_15 1 +TOP.toplevel.main;epilogue_16 1 +TOP.toplevel.main;epilogue_17 1 +TOP.toplevel.main;epilogue_18 1 +TOP.toplevel.main;epilogue_19 1 +TOP.toplevel.main;epilogue_20 1 +TOP.toplevel.main;epilogue_21 1 +TOP.toplevel.main;epilogue_22 1 +TOP.toplevel.main;epilogue_23 1 +TOP.toplevel.main;epilogue_24 1 +TOP.toplevel.main;epilogue_25 1 +TOP.toplevel.main;epilogue_26 1 +TOP.toplevel.main;epilogue_27 1 +TOP.toplevel.main;epilogue_28 1 +TOP.toplevel.main;epilogue_29 1 +TOP.toplevel.main;epilogue_30 1 +TOP.toplevel.main;epilogue_31 1 +TOP.toplevel.main;s0_r9_op_mod 35 +TOP.toplevel.main;s0_r8_op_mod 35 +TOP.toplevel.main;s0_r7_op_mod 35 +TOP.toplevel.main;s0_r6_op_mod 35 +TOP.toplevel.main;s0_r5_op_mod 35 +TOP.toplevel.main;s0_r4_op_mod 35 +TOP.toplevel.main;s0_r3_op_mod 35 +TOP.toplevel.main;s0_r2_op_mod 35 +TOP.toplevel.main;s0_r1_op_mod 35 +TOP.toplevel.main;s0_r0_op_mod 35 +TOP.toplevel.main;precursor_31 4 +TOP.toplevel.main;precursor_30 4 +TOP.toplevel.main;precursor_29 4 +TOP.toplevel.main;precursor_28 4 +TOP.toplevel.main;precursor_27 4 +TOP.toplevel.main;precursor_26 4 +TOP.toplevel.main;precursor_25 4 +TOP.toplevel.main;precursor_24 4 +TOP.toplevel.main;precursor_23 4 +TOP.toplevel.main;precursor_22 4 +TOP.toplevel.main;precursor_21 4 +TOP.toplevel.main;precursor_20 4 +TOP.toplevel.main;precursor_19 4 +TOP.toplevel.main;precursor_18 4 +TOP.toplevel.main;precursor_17 4 +TOP.toplevel.main;precursor_16 4 +TOP.toplevel.main;precursor_15 4 +TOP.toplevel.main;precursor_14 4 +TOP.toplevel.main;precursor_13 4 +TOP.toplevel.main;precursor_12 4 +TOP.toplevel.main;precursor_11 4 +TOP.toplevel.main;precursor_10 4 +TOP.toplevel.main;s1_r9_op_mod 35 +TOP.toplevel.main;s1_r8_op_mod 35 +TOP.toplevel.main;s1_r7_op_mod 35 +TOP.toplevel.main;s1_r6_op_mod 35 +TOP.toplevel.main;s1_r5_op_mod 35 +TOP.toplevel.main;s1_r4_op_mod 35 +TOP.toplevel.main;s1_r3_op_mod 35 +TOP.toplevel.main;s1_r2_op_mod 35 +TOP.toplevel.main;s1_r1_op_mod 35 +TOP.toplevel.main;s1_r0_op_mod 35 +TOP.toplevel.main;s2_r9_op_mod 35 +TOP.toplevel.main;s2_r8_op_mod 35 +TOP.toplevel.main;s2_r7_op_mod 35 +TOP.toplevel.main;s2_r6_op_mod 35 +TOP.toplevel.main;s2_r5_op_mod 35 +TOP.toplevel.main;s2_r4_op_mod 35 +TOP.toplevel.main;s2_r3_op_mod 35 +TOP.toplevel.main;s2_r2_op_mod 35 +TOP.toplevel.main;s2_r1_op_mod 35 +TOP.toplevel.main;s2_r0_op_mod 35 +TOP.toplevel.main;s3_r9_op_mod 35 +TOP.toplevel.main;s3_r8_op_mod 35 +TOP.toplevel.main;s3_r7_op_mod 35 +TOP.toplevel.main;s3_r6_op_mod 35 +TOP.toplevel.main;s3_r5_op_mod 35 +TOP.toplevel.main;s3_r4_op_mod 35 +TOP.toplevel.main;s3_r3_op_mod 35 +TOP.toplevel.main;s3_r2_op_mod 35 +TOP.toplevel.main;s3_r1_op_mod 35 +TOP.toplevel.main;s3_r0_op_mod 35 +TOP.toplevel.main;s4_r9_op_mod 35 +TOP.toplevel.main;s4_r8_op_mod 35 +TOP.toplevel.main;s4_r7_op_mod 35 +TOP.toplevel.main;s4_r6_op_mod 35 +TOP.toplevel.main;s4_r5_op_mod 35 +TOP.toplevel.main;s4_r4_op_mod 35 +TOP.toplevel.main;s4_r3_op_mod 35 +TOP.toplevel.main;s4_r2_op_mod 35 +TOP.toplevel.main;s4_r1_op_mod 35 +TOP.toplevel.main;s4_r0_op_mod 35 +TOP.toplevel.main;s0_r31_op_mod 35 +TOP.toplevel.main;s0_r30_op_mod 35 +TOP.toplevel.main;s0_r29_op_mod 35 +TOP.toplevel.main;s0_r28_op_mod 35 +TOP.toplevel.main;s0_r27_op_mod 35 +TOP.toplevel.main;s0_r26_op_mod 35 +TOP.toplevel.main;s0_r25_op_mod 35 +TOP.toplevel.main;s0_r24_op_mod 35 +TOP.toplevel.main;s0_r23_op_mod 35 +TOP.toplevel.main;s0_r22_op_mod 35 +TOP.toplevel.main;s0_r21_op_mod 35 +TOP.toplevel.main;s0_r20_op_mod 35 +TOP.toplevel.main;s0_r19_op_mod 35 +TOP.toplevel.main;s0_r18_op_mod 35 +TOP.toplevel.main;s0_r17_op_mod 35 +TOP.toplevel.main;s0_r16_op_mod 35 +TOP.toplevel.main;s0_r15_op_mod 35 +TOP.toplevel.main;s0_r14_op_mod 35 +TOP.toplevel.main;s0_r13_op_mod 35 +TOP.toplevel.main;s0_r12_op_mod 35 +TOP.toplevel.main;s0_r11_op_mod 35 +TOP.toplevel.main;s0_r10_op_mod 35 +TOP.toplevel.main;s1_r31_op_mod 35 +TOP.toplevel.main;s1_r30_op_mod 35 +TOP.toplevel.main;s1_r29_op_mod 35 +TOP.toplevel.main;s1_r28_op_mod 35 +TOP.toplevel.main;s1_r27_op_mod 35 +TOP.toplevel.main;s1_r26_op_mod 35 +TOP.toplevel.main;s1_r25_op_mod 35 +TOP.toplevel.main;s1_r24_op_mod 35 +TOP.toplevel.main;s1_r23_op_mod 35 +TOP.toplevel.main;s1_r22_op_mod 35 +TOP.toplevel.main;s1_r21_op_mod 35 +TOP.toplevel.main;s1_r20_op_mod 35 +TOP.toplevel.main;s1_r19_op_mod 35 +TOP.toplevel.main;s1_r18_op_mod 35 +TOP.toplevel.main;s1_r17_op_mod 35 +TOP.toplevel.main;s1_r16_op_mod 35 +TOP.toplevel.main;s1_r15_op_mod 35 +TOP.toplevel.main;s1_r14_op_mod 35 +TOP.toplevel.main;s1_r13_op_mod 35 +TOP.toplevel.main;s1_r12_op_mod 35 +TOP.toplevel.main;s1_r11_op_mod 35 +TOP.toplevel.main;s1_r10_op_mod 35 +TOP.toplevel.main;s2_r31_op_mod 35 +TOP.toplevel.main;s2_r30_op_mod 35 +TOP.toplevel.main;s2_r29_op_mod 35 +TOP.toplevel.main;s2_r28_op_mod 35 +TOP.toplevel.main;s2_r27_op_mod 35 +TOP.toplevel.main;s2_r26_op_mod 35 +TOP.toplevel.main;s2_r25_op_mod 35 +TOP.toplevel.main;s2_r24_op_mod 35 +TOP.toplevel.main;s2_r23_op_mod 35 +TOP.toplevel.main;s2_r22_op_mod 35 +TOP.toplevel.main;s2_r21_op_mod 35 +TOP.toplevel.main;s2_r20_op_mod 35 +TOP.toplevel.main;s2_r19_op_mod 35 +TOP.toplevel.main;s2_r18_op_mod 35 +TOP.toplevel.main;s2_r17_op_mod 35 +TOP.toplevel.main;s2_r16_op_mod 35 +TOP.toplevel.main;s2_r15_op_mod 35 +TOP.toplevel.main;s2_r14_op_mod 35 +TOP.toplevel.main;s2_r13_op_mod 35 +TOP.toplevel.main;s2_r12_op_mod 35 +TOP.toplevel.main;s2_r11_op_mod 35 +TOP.toplevel.main;s2_r10_op_mod 35 +TOP.toplevel.main;s3_r31_op_mod 35 +TOP.toplevel.main;s3_r30_op_mod 35 +TOP.toplevel.main;s3_r29_op_mod 35 +TOP.toplevel.main;s3_r28_op_mod 35 +TOP.toplevel.main;s3_r27_op_mod 35 +TOP.toplevel.main;s3_r26_op_mod 35 +TOP.toplevel.main;s3_r25_op_mod 35 +TOP.toplevel.main;s3_r24_op_mod 35 +TOP.toplevel.main;s3_r23_op_mod 35 +TOP.toplevel.main;s3_r22_op_mod 35 +TOP.toplevel.main;s3_r21_op_mod 35 +TOP.toplevel.main;s3_r20_op_mod 35 +TOP.toplevel.main;s3_r19_op_mod 35 +TOP.toplevel.main;s3_r18_op_mod 35 +TOP.toplevel.main;s3_r17_op_mod 35 +TOP.toplevel.main;s3_r16_op_mod 35 +TOP.toplevel.main;s3_r15_op_mod 35 +TOP.toplevel.main;s3_r14_op_mod 35 +TOP.toplevel.main;s3_r13_op_mod 35 +TOP.toplevel.main;s3_r12_op_mod 35 +TOP.toplevel.main;s3_r11_op_mod 35 +TOP.toplevel.main;s3_r10_op_mod 35 +TOP.toplevel.main;s4_r31_op_mod 35 +TOP.toplevel.main;s4_r30_op_mod 35 +TOP.toplevel.main;s4_r29_op_mod 35 +TOP.toplevel.main;s4_r28_op_mod 35 +TOP.toplevel.main;s4_r27_op_mod 35 +TOP.toplevel.main;s4_r26_op_mod 35 +TOP.toplevel.main;s4_r25_op_mod 35 +TOP.toplevel.main;s4_r24_op_mod 35 +TOP.toplevel.main;s4_r23_op_mod 35 +TOP.toplevel.main;s4_r22_op_mod 35 +TOP.toplevel.main;s4_r21_op_mod 35 +TOP.toplevel.main;s4_r20_op_mod 35 +TOP.toplevel.main;s4_r19_op_mod 35 +TOP.toplevel.main;s4_r18_op_mod 35 +TOP.toplevel.main;s4_r17_op_mod 35 +TOP.toplevel.main;s4_r16_op_mod 35 +TOP.toplevel.main;s4_r15_op_mod 35 +TOP.toplevel.main;s4_r14_op_mod 35 +TOP.toplevel.main;s4_r13_op_mod 35 +TOP.toplevel.main;s4_r12_op_mod 35 +TOP.toplevel.main;s4_r11_op_mod 35 +TOP.toplevel.main;s4_r10_op_mod 35 diff --git a/tools/profiler/parse-vcd.py b/tools/profiler/parse-vcd.py index 12f3f7ec7..9dc487a6c 100644 --- a/tools/profiler/parse-vcd.py +++ b/tools/profiler/parse-vcd.py @@ -8,7 +8,7 @@ def remove_size_from_name(name: str) -> str: return name.split('[')[0] class ProfilingInfo: - def __init__(self, name, fsm_name=None, fsm_values=None, tdcc_group_name=None): + def __init__(self, name, component, fsm_name=None, fsm_values=None, tdcc_group_name=None): self.name = name self.fsm_name = fsm_name self.fsm_values = fsm_values @@ -16,6 +16,7 @@ def __init__(self, name, fsm_name=None, fsm_values=None, tdcc_group_name=None): self.closed_segments = [] # Segments will be (start_time, end_time) self.current_segment = None self.tdcc_group = tdcc_group_name + self.component = component def __repr__ (self): segments_str = "" @@ -23,13 +24,15 @@ def __repr__ (self): if (segments_str != ""): segments_str += ", " segments_str += f"[{segment['start']}, {segment['end']})" - if self.fsm_name is None: - header = f"[GT] Group {self.name}:\n" - else: + if self.fsm_name is not None: header = (f"[FSM] Group {self.name}:\n" + f"\tFSM name: {self.fsm_name}\n" + f"\tFSM state ids: {self.fsm_values}\n" ) + elif self.component is None: + header = f"[CMP] Group {self.name}:\n" + else: + header = f"[GT] Group {self.name}:\n" return (header + f"\tTotal cycles: {self.total_cycles}\n" + @@ -56,6 +59,8 @@ def emit_csv_data(self): name = self.name if self.fsm_name is not None: name += "[FSM]" + if self.component is None: + name += "[CMP]" return {"name": name, "total-cycles" : self.total_cycles, "times-active" : len(self.closed_segments), @@ -89,11 +94,11 @@ def end_current_segment(self, curr_clock_cycle): class VCDConverter(vcdvcd.StreamParserCallbacks): - def __init__(self, fsms, single_enable_names, tdcc_groups, fsm_group_maps, main_component): + def __init__(self, fsms, single_enable_names, tdcc_groups, fsm_group_maps, main_component, cells): super().__init__() self.main_component = main_component self.fsms = fsms - self.single_enable_names = single_enable_names + self.single_enable_names = single_enable_names.keys() # Recording the first cycle when the TDCC group became active self.tdcc_group_active_cycle = {tdcc_group_name : -1 for tdcc_group_name in tdcc_groups} # Map from a TDCC group to all FSMs that depend on it. maybe a 1:1 mapping @@ -103,11 +108,14 @@ def __init__(self, fsms, single_enable_names, tdcc_groups, fsm_group_maps, main_ self.signal_to_curr_value = {fsm : -1 for fsm in fsms} for group in fsm_group_maps: # Differentiate FSM versions from ground truth versions - self.profiling_info[f"{group}FSM"] = ProfilingInfo(group, fsm_group_maps[group]["fsm"], fsm_group_maps[group]["ids"], fsm_group_maps[group]["tdcc-group-name"]) + self.profiling_info[f"{group}FSM"] = ProfilingInfo(group, fsm_group_maps[group]["component"], fsm_group_maps[group]["fsm"], fsm_group_maps[group]["ids"], fsm_group_maps[group]["tdcc-group-name"]) for single_enable_group in single_enable_names: - self.profiling_info[single_enable_group] = ProfilingInfo(single_enable_group) + self.profiling_info[single_enable_group] = ProfilingInfo(single_enable_group, single_enable_names[single_enable_group]) self.signal_to_curr_value[f"{single_enable_group}_go"] = -1 self.signal_to_curr_value[f"{single_enable_group}_done"] = -1 + self.cells = cells + for cell in cells: + self.profiling_info[cell] = ProfilingInfo(cell, None) # Map from timestamps [ns] to value change events that happened on that timestamp self.timestamps_to_events = {} @@ -117,8 +125,9 @@ def enddefinitions(self, vcd, signals, cur_sig_vals): refs = sorted(refs, key=lambda e: e[0]) names = [remove_size_from_name(e[0]) for e in refs] signal_id_dict = {sid : [] for sid in vcd.references_to_ids.values()} # one id can map to multiple signal names since wires are connected - main_go_name = f"{self.main_component}.go" - signal_id_dict[vcd.references_to_ids[main_go_name]] = [main_go_name] + + # main_go_name = f"{self.main_component}.go" + # signal_id_dict[vcd.references_to_ids[main_go_name]] = [main_go_name] clock_name = f"{self.main_component}.clk" if clock_name not in names: @@ -126,6 +135,13 @@ def enddefinitions(self, vcd, signals, cur_sig_vals): sys.exit(1) signal_id_dict[vcd.references_to_ids[clock_name]] = [clock_name] + # get go and done for cells (the signals are exactly {cell}.go and {cell}.done) + for cell in self.cells: + cell_go = cell + ".go" + cell_done = cell + ".done" + signal_id_dict[vcd.references_to_ids[cell_go]].append(cell_go) + signal_id_dict[vcd.references_to_ids[cell_done]].append(cell_done) + for name, sid in refs: # FIXME: We may want to optimize these nested for loops for tdcc_group in self.tdcc_group_to_dep_fsms: @@ -199,7 +215,7 @@ def postprocess(self): # checking whether the timestamp has a rising edge (hacky) if {"signal": clock_name, "value": 1} in events: clock_cycles += 1 - # TDCC groups need to be recorded for tracking FSM values + # TDCC groups need to be recorded (before FSMs) for tracking FSM values # (ex. if the FSM has value 0 but the TDCC group isn't active, then the group represented by the # FSM's 0 value should not be considered as active) for tdcc_event in filter(lambda e : "tdcc" in e["signal"] and "go" in e["signal"], events): @@ -209,6 +225,8 @@ def postprocess(self): for fsm in self.tdcc_group_to_dep_fsms[tdcc_group]: value = fsm_to_curr_value[fsm] if value != -1: + if value not in self.fsms[fsm]: + continue next_group = f"{self.fsms[fsm][value]}FSM" fsm_to_active_group[fsm] = next_group self.profiling_info[next_group].start_new_segment(clock_cycles) @@ -219,6 +237,12 @@ def postprocess(self): value = event["value"] if "tdcc" in signal_name and "go" in signal_name: # skip all tdcc events since we've already processed them continue + if signal_name.endswith(".go") and value == 1: # cells have .go and .done + cell = signal_name.split(".go")[0] + self.profiling_info[cell].start_new_segment(clock_cycles) + if signal_name.endswith(".done") and value == 1: # cells have .go and .done + cell = signal_name.split(".done")[0] + self.profiling_info[cell].end_current_segment(clock_cycles) if "_go" in signal_name and value == 1: group = "_".join(signal_name.split("_")[0:-1]) self.profiling_info[group].start_new_segment(clock_cycles) @@ -276,7 +300,8 @@ def read_component_cell_names_json(json_file): # Reads json generated by TDCC (via dump-fsm-json option) to produce initial group information def remap_tdcc_json(json_file, components_to_cells): profiling_infos = json.load(open(json_file)) - group_names = set() # all groups (to record ground truth) + group_names = {} # all groups (to record ground truth). Maps to the group's component (needed for stacks) + cell_names = set() # go and done info are needed for cells tdcc_groups = {} # TDCC-generated groups that manage control flow using FSMs. maps to all fsms that map to the tdcc group fsm_group_maps = {} # fsm-managed groups info (fsm register, TDCC group that manages fsm, id of group within fsm) fsms = {} # Remapping of JSON data for easy access @@ -292,18 +317,22 @@ def remap_tdcc_json(json_file, components_to_cells): fsms[fsm_name][state["id"]] = group_name tdcc_group = cell + "." + fsm["group"] if group_name not in fsm_group_maps: - fsm_group_maps[group_name] = {"fsm": fsm_name, "tdcc-group-name": tdcc_group, "ids": [state["id"]]} + fsm_group_maps[group_name] = {"fsm": fsm_name, "tdcc-group-name": tdcc_group, "ids": [state["id"]], "component": fsm["component"]} if tdcc_group not in tdcc_groups: # Keep track of the TDCC group to figure out when first group starts tdcc_groups[tdcc_group] = set() tdcc_groups[tdcc_group].add(fsm_name) - group_names.add(group_name) - else: + group_names[group_name] = fsm["component"] + else: fsm_group_maps[group_name]["ids"].append(state["id"]) else: - for cell in components_to_cells[profiling_info["SingleEnable"]["component"]]: # get all possibilities of cells - group_names.add(cell + "." + profiling_info["SingleEnable"]["group"]) + component = profiling_info["SingleEnable"]["component"] + for cell in components_to_cells[component]: # get all possibilities of cells + group_names[cell + "." + profiling_info["SingleEnable"]["group"]] = component + for component in components_to_cells: + for cell in components_to_cells[component]: + cell_names.add(cell) - return fsms, group_names, tdcc_groups, fsm_group_maps + return fsms, group_names, tdcc_groups, fsm_group_maps, cell_names def output_result(out_csv, dump_out_json, converter): print(f"Total clock cycles: {converter.clock_cycles}") @@ -322,6 +351,8 @@ def output_result(out_csv, dump_out_json, converter): print() for group_info in groups_to_emit: print(group_info) + # Add total cycles for visualizer script (probably want to do this in a neater fashion in the future) + dump_json_acc.append({"name": "TOTAL", "total_cycles": converter.clock_cycles, "main_full_path": converter.main_component}) # emit a json for visualizer script print(f"Writing dump JSON to {dump_out_json}") with open(dump_out_json, "w", encoding="utf-8") as dump_file: @@ -339,8 +370,8 @@ def output_result(out_csv, dump_out_json, converter): def main(vcd_filename, groups_json_file, cells_json_file, out_csv, dump_out_json): main_component, components_to_cells = read_component_cell_names_json(cells_json_file) - fsms, group_names, tdcc_group_names, fsm_group_maps = remap_tdcc_json(groups_json_file, components_to_cells) - converter = VCDConverter(fsms, group_names, tdcc_group_names, fsm_group_maps, main_component) + fsms, group_names, tdcc_group_names, fsm_group_maps, cells = remap_tdcc_json(groups_json_file, components_to_cells) + converter = VCDConverter(fsms, group_names, tdcc_group_names, fsm_group_maps, main_component, cells) vcdvcd.VCDVCD(vcd_filename, callbacks=converter, store_tvs=False) converter.postprocess() output_result(out_csv, dump_out_json, converter) diff --git a/tools/profiler/run-up-to-tdcc.sh b/tools/profiler/run-up-to-tdcc.sh index 7c77fae73..2ae2f7055 100644 --- a/tools/profiler/run-up-to-tdcc.sh +++ b/tools/profiler/run-up-to-tdcc.sh @@ -12,13 +12,13 @@ CALYX_DIR=$( dirname $( dirname ${SCRIPT_DIR} ) ) if [ "$2" == "-o" ]; then ( cd ${CALYX_DIR} - cargo run $1 -p well-formed -p papercut -p canonicalize -p infer-data-path -p collapse-control -p compile-sync-without-sync-reg -p group2seq -p dead-assign-removal -p group2invoke -p infer-share -p inline -p comb-prop -p dead-cell-removal -p cell-share -p simplify-with-control -p compile-invoke -p static-inference -p static-promotion -p compile-repeat -p dead-group-removal -p collapse-control -p static-inline -p merge-assigns -p dead-group-removal -p simplify-static-guards -p add-guard -p static-fsm-opts -p compile-static -p dead-group-removal -p tdcc + cargo run $1 -p compile-repeat -p well-formed -p papercut -p canonicalize -p infer-data-path -p collapse-control -p compile-sync-without-sync-reg -p group2seq -p dead-assign-removal -p group2invoke -p infer-share -p inline -p comb-prop -p dead-cell-removal -p cell-share -p simplify-with-control -p compile-invoke -p static-inference -p static-promotion -p compile-repeat -p dead-group-removal -p collapse-control -p static-inline -p merge-assigns -p dead-group-removal -p simplify-static-guards -p add-guard -p static-fsm-opts -p compile-static -p dead-group-removal -p tdcc ) else ( cd ${CALYX_DIR} - cargo run $1 -p well-formed -p papercut -p canonicalize -p compile-sync -p simplify-with-control -p compile-invoke -p static-inline -p merge-assigns -p dead-group-removal -p simplify-static-guards -p add-guard -p static-fsm-opts -p compile-static -p dead-group-removal -p tdcc + cargo run $1 -p compile-repeat -p well-formed -p papercut -p canonicalize -p compile-sync -p simplify-with-control -p compile-invoke -p static-inline -p merge-assigns -p dead-group-removal -p simplify-static-guards -p add-guard -p static-fsm-opts -p compile-static -p dead-group-removal -p tdcc ) fi