diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0f067922..e5554f13 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -45,7 +45,7 @@ add_subdirectory(ramulator) add_subdirectory(pin/pin_lib) add_subdirectory(pin/pin_exec/testing) -set(scarab_dirs bp debug bp/template_lib dvfs frontend globals isa libs memory power prefetcher confidence .) +set(scarab_dirs bp debug bp/template_lib dvfs frontend frontend/synthetic globals isa libs memory power prefetcher confidence .) if(DEFINED ENV{SCARAB_ENABLE_PT_MEMTRACE}) set(scarab_dirs ${scarab_dirs} frontend/pt_memtrace) endif() diff --git a/src/debug/debug.param.def b/src/debug/debug.param.def index 0041a7df..c979a2c8 100644 --- a/src/debug/debug.param.def +++ b/src/debug/debug.param.def @@ -129,3 +129,5 @@ DEF_PARAM( debug_fdip , DEBUG_FDIP DEF_PARAM( debug_eip , DEBUG_EIP , Flag , Flag , FALSE , ) DEF_PARAM( debug_djolt , DEBUG_DJOLT , Flag , Flag , FALSE , ) DEF_PARAM( debug_fnlmma , DEBUG_FNLMMA , Flag , Flag , FALSE , ) + +DEF_PARAM( debug_synth , DEBUG_SYNTHETIC_INST , Flag , Flag , FALSE , ) diff --git a/src/frontend/frontend.c b/src/frontend/frontend.c index 3e64c70a..9883850b 100644 --- a/src/frontend/frontend.c +++ b/src/frontend/frontend.c @@ -36,6 +36,7 @@ #include "general.param.h" #include "bp/bp.h" +#include "frontend/synthetic/synth_fe.h" #include "frontend_intf.h" #include "icache_stage.h" @@ -72,6 +73,10 @@ void frontend_init() { trace_init(); break; } + case FE_SYNTHETIC: { + synth_init(); + break; + } #ifdef ENABLE_PT_MEMTRACE case FE_PT: case FE_MEMTRACE: { @@ -95,6 +100,10 @@ void frontend_done(Flag* retired_exit) { trace_done(); break; } + case FE_SYNTHETIC: { + synth_done(); + break; + } #ifdef ENABLE_PT_MEMTRACE case FE_PT: case FE_MEMTRACE: { @@ -178,4 +187,4 @@ void frontend_extract_basic_block_vectors() { } } #endif -/*************************************************************/ +/*************************************************************/ \ No newline at end of file diff --git a/src/frontend/frontend_intf.c b/src/frontend/frontend_intf.c index 18b89a04..7b074397 100644 --- a/src/frontend/frontend_intf.c +++ b/src/frontend/frontend_intf.c @@ -35,6 +35,7 @@ /* Include headers of all the implementations here */ #include "frontend/pin_exec_driven_fe.h" #include "frontend/pin_trace_fe.h" +#include "frontend/synthetic/synth_fe.h" #ifdef ENABLE_PT_MEMTRACE #include "frontend/pt_memtrace/trace_fe.h" diff --git a/src/frontend/frontend_table.def b/src/frontend/frontend_table.def index 66553289..2e271282 100644 --- a/src/frontend/frontend_table.def +++ b/src/frontend/frontend_table.def @@ -29,6 +29,7 @@ // Format: enum name, text name, function name prefix FRONTEND_IMPL(PIN_EXEC_DRIVEN, "pin_exec_driven", pin_exec_driven) FRONTEND_IMPL(TRACE, "trace", trace) +FRONTEND_IMPL(SYNTHETIC, "synthetic", synth) #ifdef ENABLE_PT_MEMTRACE FRONTEND_IMPL(MEMTRACE, "memtrace", ext_trace) FRONTEND_IMPL(PT, "pt", ext_trace) diff --git a/src/frontend/synthetic/kernel_params.h b/src/frontend/synthetic/kernel_params.h new file mode 100644 index 00000000..1d33643e --- /dev/null +++ b/src/frontend/synthetic/kernel_params.h @@ -0,0 +1,10 @@ +/* Kernel Enum */ +typedef enum Kernel_Id_Enum { +#define KERNEL_IMPL(id, name) id, +#include "frontend/synthetic/kernel_table.def" +#undef KERNEL_IMPL + INVALID +} Kernel_Enum; + +extern Kernel_Enum kernel; +extern const char* kernel_names[]; \ No newline at end of file diff --git a/src/frontend/synthetic/kernel_table.def b/src/frontend/synthetic/kernel_table.def new file mode 100644 index 00000000..83cd87c7 --- /dev/null +++ b/src/frontend/synthetic/kernel_table.def @@ -0,0 +1,18 @@ +// Format: enum name, text name, function name prefix +KERNEL_IMPL(MEM_BANDWIDTH_LIMITED, "mem_bandwidth_limited") +KERNEL_IMPL(ICACHE_LIMITED, "icache_limited") +KERNEL_IMPL(DCACHE_LIMITED, "dcache_limited") +KERNEL_IMPL(MLC_LIMITED, "mlc_limited") +KERNEL_IMPL(LLC_LIMITED, "llc_limited") +KERNEL_IMPL(MEM_LIMITED, "mem_limited") +KERNEL_IMPL(ILP_LIMITED_1_DEP_CHAIN, "ilp_limited_1_dep_chain") +KERNEL_IMPL(ILP_LIMITED_2_DEP_CHAIN, "ilp_limited_2_dep_chain") +KERNEL_IMPL(ILP_LIMITED_4_DEP_CHAIN, "ilp_limited_4_dep_chain") +KERNEL_IMPL(CBR_LIMITED_20T, "cbr_limited_20t") +KERNEL_IMPL(CBR_LIMITED_50T, "cbr_limited_50t") +KERNEL_IMPL(CBR_LIMITED_80T, "cbr_limited_80t") +KERNEL_IMPL(BTB_LIMITED_FULL_CAPACITY_SWEEP, "btb_limited_full_capacity_sweep") +KERNEL_IMPL(BTB_LIMITED_FULL_ASSOC_SWEEP, "btb_limited_assoc_sweep") +KERNEL_IMPL(IBR_LIMITED_ROUNDROBIN_4TGTS, "ibr_limited_RR_4Tgts") +KERNEL_IMPL(IBR_LIMITED_RANDOM_4TGTS, "ibr_limited_random_4Tgts") +KERNEL_IMPL(IBR_LIMITED_Random_2TGTS, "ibr_limited_random_2Tgts") diff --git a/src/frontend/synthetic/run.sh b/src/frontend/synthetic/run.sh new file mode 100755 index 00000000..39634a37 --- /dev/null +++ b/src/frontend/synthetic/run.sh @@ -0,0 +1,181 @@ +#!/bin/bash + +if [ -z "${SCARAB_ROOT}" ]; then + echo "set SCARAB_ROOT variable" + exit 1 +fi + +SCARAB_SRC="${SCARAB_ROOT}/src" +SCARAB="${SCARAB_SRC}/scarab" + +if [ ! -f "${SCARAB}" ]; then + echo "build scarab first before running this" + exit 1 +fi + +INST_LIMIT="10000000" +FRONTEND="synthetic" +kernels=( + "icache_limited" + "mem_bandwidth_limited_1FU" + "mem_bandwidth_limited_2FU" + "mem_bandwidth_limited_4FU" + "dcache_limited" + "mlc_limited" + "llc_limited" + "ilp_limited_1_dep_chain" + "ilp_limited_2_dep_chain" + "ilp_limited_4_dep_chain" + "cbr_limited_20t" + "cbr_limited_50t" + "cbr_limited_80t" + "btb_limited_full_capacity_sweep" + "btb_limited_assoc_sweep" + "ibr_limited_RR_4Tgts" + "ibr_limited_random_4Tgts" + "ibr_limited_random_2Tgts" + "mem_limited" + ) +OFF_FDIP="--fdip_enable 0" +UOP_CACHE_SCALE_UP="--uop_cache_lines 524288" +CBR_SCALE_BTB="--btb_entries 1048576 --btb_assoc 1024 --uop_cache_lines 1048576 --uop_cache_assoc 1024 --icache_size 524288 --icache_assoc 512" +UBR_SCALE_BTB="--btb_entries 128 --uop_cache_lines 1048576 --uop_cache_assoc 1024 --icache_size 524288 --icache_assoc 512" + +FUTYPES_1FU="b00100100111111100111001001111110010010011111110011100100111111 \ + b10010000110111100011111001110111001000011011110001111100111011 \ + b00000001000000001000000010000010000000100000000100000000000001 \ + b00000001000000001000000010000010000000100000000100000001000001 \ + b00000000110111100011011001110110000000011011110001101100111011 \ + b01001000000000000111001001111110100100000000000011100100111111 \ + b00000010000000010000000100000010000001000000001000000010000001 \ + b00000010000000010000000100000010000001000000001000000010000001" + +FUTYPES_2FU="b00100100111111100111001001111110010010011111110011100100111111 \ + b10010000110111100011111001110111001000011011110001111100111011 \ + b00000001000000001000000010000010000000100000000100000001000001 \ + b00000001000000001000000010000010000000100000000100000001000001 \ + b00000000110111100011011001110110000000011011110001101100111011 \ + b01001000000000000111001001111110100100000000000011100100111111 \ + b00000010000000010000000100000010000001000000001000000010000001 \ + b00000010000000010000000100000010000001000000001000000010000001" + +FUTYPES_4FU="b00100100111111100111001001111110010010011111110011100100111111 \ + b10010000110111100011111001110111001000011011110001111100111011 \ + b00000001000000001000000010000010000000100000000100000001000001 \ + b00000001000000001000000010000010000000100000000100000001000001 \ + b00000000110111100011011001110110000000011011110001101101111011 \ + b01001000000000000111001001111110100100000000000011100101111111 \ + b00000010000000010000000100000010000001000000001000000010000001 \ + b00000010000000010000000100000010000001000000001000000010000001" + + +for args in "${@}"; do + case ${args} in + --icache) + mkdir icache_limited + cd icache_limited + rm -r * + cp "${SCARAB_SRC}/PARAMS.sunny_cove" PARAMS.in + echo ""${SCARAB}" --frontend "${FRONTEND}" --kernel icache_limited --inst_limit "${INST_LIMIT}" ${OFF_FDIP}" + "${SCARAB}" --frontend "${FRONTEND}" --kernel icache_limited --inst_limit "${INST_LIMIT}" ${OFF_FDIP} + cd .. + ;; + + --mem_bandwidth_limited_1FU) + mkdir mem_band_1fu + cd mem_band_1fu + rm -r * + cp "${SCARAB_SRC}/PARAMS.sunny_cove" PARAMS.in + echo ""${SCARAB}" --frontend "${FRONTEND}" --kernel mem_bandwidth_limited --inst_limit "${INST_LIMIT}" --fu_types "${FUTYPES_1FU}"" + "${SCARAB}" --frontend "${FRONTEND}" --kernel mem_bandwidth_limited --inst_limit "${INST_LIMIT}" --fu_types "${FUTYPES_1FU}" + cd .. + ;; + + --mem_bandwidth_limited_2FU) + mkdir mem_band_2fu + cd mem_band_2fu + rm -r * + cp "${SCARAB_SRC}/PARAMS.sunny_cove" PARAMS.in + echo ""${SCARAB}" --frontend "${FRONTEND}" --kernel mem_bandwidth_limited --inst_limit "${INST_LIMIT}" --fu_types "${FUTYPES_2FU}"" + "${SCARAB}" --frontend "${FRONTEND}" --kernel mem_bandwidth_limited --inst_limit "${INST_LIMIT}" --fu_types "${FUTYPES_2FU}" + cd .. + ;; + + --mem_bandwidth_limited_4FU) + mkdir mem_band_4fu + cd mem_band_4fu + rm -r * + cp "${SCARAB_SRC}/PARAMS.sunny_cove" PARAMS.in + echo ""${SCARAB}" --frontend "${FRONTEND}" --kernel mem_bandwidth_limited --inst_limit "${INST_LIMIT}" --fu_types "${FUTYPES_4FU}" --dcache_read_ports 6" + "${SCARAB}" --frontend "${FRONTEND}" --kernel mem_bandwidth_limited --inst_limit "${INST_LIMIT}" --fu_types "${FUTYPES_4FU}" --dcache_read_ports 6 + cd .. + ;; + + --all) + for item in "${kernels[@]}" ; do + mkdir -p "./${item}" + cd "./${item}/" + rm -r * + cp "${SCARAB_SRC}/PARAMS.sunny_cove" PARAMS.in + # "${SCARAB}" --frontend "${FRONTEND}" --kernel "${item}" --inst_limit "${INST_LIMIT}" + case "${item}" in + icache_limited) + echo ""${SCARAB}" --frontend "${FRONTEND}" --kernel "${item}" --inst_limit "${INST_LIMIT}" ${OFF_FDIP}" + "${SCARAB}" --frontend "${FRONTEND}" --kernel "${item}" --inst_limit "${INST_LIMIT}" ${OFF_FDIP} + ;; + + cbr_limited_20t|cbr_limited_50t|cbr_limited_80t) + echo ""${SCARAB}" --frontend "${FRONTEND}" --kernel "${item}" --inst_limit "${INST_LIMIT}" ${CBR_SCALE_BTB}" + "${SCARAB}" --frontend "${FRONTEND}" --kernel "${item}" --inst_limit "${INST_LIMIT}" ${CBR_SCALE_BTB} + ;; + + btb_limited_full_capacity_sweep|btb_limited_assoc_sweep) + echo ""${SCARAB}" --frontend "${FRONTEND}" --kernel "${item}" --inst_limit "${INST_LIMIT}" ${UBR_SCALE_BTB}" + "${SCARAB}" --frontend "${FRONTEND}" --kernel "${item}" --inst_limit "${INST_LIMIT}" ${UBR_SCALE_BTB} + ;; + + mem_bandwidth_limited_1FU) + mkdir mem_band_1fu + cd mem_band_1fu + rm -r * + cp "${SCARAB_SRC}/PARAMS.sunny_cove" PARAMS.in + echo ""${SCARAB}" --frontend "${FRONTEND}" --kernel mem_bandwidth_limited --inst_limit "${INST_LIMIT}" --fu_types "${FUTYPES_1FU}" --dcache_read_ports 6" + "${SCARAB}" --frontend "${FRONTEND}" --kernel mem_bandwidth_limited --inst_limit "${INST_LIMIT}" --fu_types "${FUTYPES_1FU}" --dcache_read_ports 6 + cd .. + ;; + + mem_bandwidth_limited_2FU) + mkdir mem_band_2fu + cd mem_band_2fu + rm -r * + cp "${SCARAB_SRC}/PARAMS.sunny_cove" PARAMS.in + echo ""${SCARAB}" --frontend "${FRONTEND}" --kernel mem_bandwidth_limited --inst_limit "${INST_LIMIT}" --fu_types "${FUTYPES_2FU}" --dcache_read_ports 6" + "${SCARAB}" --frontend "${FRONTEND}" --kernel mem_bandwidth_limited --inst_limit "${INST_LIMIT}" --fu_types "${FUTYPES_2FU}" --dcache_read_ports 6 + cd .. + ;; + + mem_bandwidth_limited_4FU) + mkdir mem_band_4fu + cd mem_band_4fu + rm -r * + cp "${SCARAB_SRC}/PARAMS.sunny_cove" PARAMS.in + echo ""${SCARAB}" --frontend "${FRONTEND}" --kernel mem_bandwidth_limited --inst_limit "${INST_LIMIT}" --fu_types "${FUTYPES_4FU}" --dcache_read_ports 6" + "${SCARAB}" --frontend "${FRONTEND}" --kernel mem_bandwidth_limited --inst_limit "${INST_LIMIT}" --fu_types "${FUTYPES_4FU}" --dcache_read_ports 6 + cd .. + ;; + + *) + echo ""${SCARAB}" --frontend "${FRONTEND}" --kernel "${item}" --inst_limit "${INST_LIMIT}"" + "${SCARAB}" --frontend "${FRONTEND}" --kernel "${item}" --inst_limit "${INST_LIMIT}" + ;; + esac + cd .. + done + ;; + + *) + echo "unknown option" + exit 1 + ;; + esac +done \ No newline at end of file diff --git a/src/frontend/synthetic/sampler.cc b/src/frontend/synthetic/sampler.cc new file mode 100644 index 00000000..4c015696 --- /dev/null +++ b/src/frontend/synthetic/sampler.cc @@ -0,0 +1,152 @@ + +#include "sampler.h" + +#include +#include +/* + * Overloaded Constructor that generates based on + * value_range : number of discrete values in the sequence(normal_random) or sequence_length(uniform sequences). + * start_val : is the beginning element ofthe sequence + * stride : stride between elements in the sequence + * periodicity : number of sequence duplicates + * strategy : suggests the pattern of the sequence + */ + +Sampler::Sampler(Sequence_Pick_Strategy strategy, uns64 value_range, uns64 startval, uns64 stride, uns periodicity, + std::vector user_seq) + : strategy(strategy), next_pick_index(0) { + switch (strategy) { + case UNIFORM_SEQUENTIAL: + default: // <- default + sequence_vector.resize(value_range); + // cr eate sequence + for (uns64 i{0}; i < value_range; i++) { + sequence_vector[i] = startval + i * stride; + } + Sampler::scale_periodicity(periodicity); + break; + + case UNIFORM_RANDOM: + sequence_vector.resize(value_range); + // create sequence + for (uns64 i{0}; i < value_range; i++) { + sequence_vector[i] = startval + i * stride; + }; + // shuffle + std::shuffle(sequence_vector.begin(), sequence_vector.end(), rng_engine); + Sampler::scale_periodicity(periodicity); + break; + + case NORMAL_RANDOM: { + uns64 H = 90; // scaling factor + uns64 mu = value_range / 2; // center + uns64 sigma = value_range / 6; // spread + + std::vector discrete_weights(value_range); // weights of discrete + // values + std::vector discrete_values(value_range); // set of distinct values going to be in the sequence + + // generate the set of discrete values + for (uns64 i{0}; i < value_range; i++) { + discrete_values[i] = startval + i * stride; + }; + + // generate weights for each discrete value + for (uns64 i = 0; i < value_range; i++) + discrete_weights[i] = static_cast(std::round(H * std::exp(-0.5 * std::pow((i - mu) / sigma, 2)))); + + // duplicate discrete values according to their weights + for (uns64 i = 0; i < value_range; i++) { + for (uns64 j = 0; j < discrete_weights[i]; j++) { + sequence_vector.push_back(discrete_values[i]); + } + } + // shuffle distribution + std::shuffle(sequence_vector.begin(), sequence_vector.end(), rng_engine); + Sampler::scale_periodicity(periodicity); + break; + } + + case USER_DEFINED: + sequence_vector = user_seq; + Sampler::scale_periodicity(periodicity); + break; + } +} + +/* + * Overloaded Constructor receives a userdefined sequence set of discreete + values and constructs a sequence based on selection strategy. + * discrete_values : vecctor of distinct values that are going to be in the + sequence. + * discrete_weights : corresponding weights of distinct values, by default all + are equally likely, ie; = 1. + * sequence_length : specifies length of the discrete distribution to be + generated. + * periodicity : number of sequence duplicates + * strategy : suggests the pattern of the sequence + NB: when strategy is UNIFORM_*, the discrete_values become the actual sequence + to be consumed, if NORMAL_RANDOM we generate a discrete sequence based on the + dicrete values and their corresponding weigths + */ + +Sampler::Sampler(std::vector discrete_values, Sequence_Pick_Strategy strategy, uns64 periodicity, + uns sequence_length, std::vector discrete_weights = {}) + : strategy(strategy), next_pick_index(0) { + assert(!discrete_values.empty() && "sequence cannot be empty"); + switch (strategy) { + case UNIFORM_SEQUENTIAL: + case USER_DEFINED: + default: // <- default + sequence_vector = discrete_values; + Sampler::scale_periodicity(periodicity); + break; + case UNIFORM_RANDOM: + sequence_vector = discrete_values; + std::shuffle(sequence_vector.begin(), sequence_vector.end(), rng_engine); + Sampler::scale_periodicity(periodicity); + break; + case NORMAL_RANDOM: + if (discrete_weights.empty()) { + // if user didnt pass weights, every unique sample has equal weight + discrete_weights = std::vector(discrete_values.size(), 1); + } + // RNG based on discrete weights + std::discrete_distribution discrete_dist(discrete_weights.begin(), discrete_weights.end()); + + for (size_t i{0}; i < sequence_length; i++) { + auto idx = discrete_dist(rng_engine); + assert(idx < discrete_values.size() && "distribution exceeds index"); + sequence_vector.push_back(discrete_values[idx]); + } + Sampler::scale_periodicity(periodicity); + break; + } +} + +uns64 Sampler::get_next_element() { + auto index{next_pick_index}; + if (++next_pick_index >= sequence_vector.size()) + next_pick_index = 0; + return sequence_vector[index]; +} +// seed +static std::random_device seed; +// Define and initialize outside the class +std::default_random_engine Sampler::rng_engine(seed()); + +void Sampler::scale_periodicity(uns64 periodicity) { + auto original_vector = Sampler::sequence_vector; + sequence_vector.reserve(sequence_vector.size() * periodicity); + for (uns i = 1; i < periodicity; i++) { + sequence_vector.insert(sequence_vector.end(), original_vector.begin(), original_vector.end()); + } +} + +uns64 Sampler::peek_element_following_next() const { + auto index = (next_pick_index + 1) % sequence_vector.size(); + if (index < next_pick_index) + return sequence_vector.back(); + else + return sequence_vector[index]; +} \ No newline at end of file diff --git a/src/frontend/synthetic/sampler.h b/src/frontend/synthetic/sampler.h new file mode 100644 index 00000000..f2c7b67e --- /dev/null +++ b/src/frontend/synthetic/sampler.h @@ -0,0 +1,44 @@ +#ifndef SAMPLER_H +#define SAMPLER_H + +#include +#include +#include + +enum Sequence_Pick_Strategy { + UNIFORM_SEQUENTIAL, // pick sequentially + UNIFORM_RANDOM, // pick sequentially but sequence vector must be shuffled + NORMAL_RANDOM, // use the formula + USER_DEFINED // user supplied sequence +}; + +class Sampler { + std::vector sequence_vector; + Sequence_Pick_Strategy strategy; + uns64 next_pick_index; + void scale_periodicity(uns64 periodicity); + + public: + // generates the sequence accorfing to stride, periodicity and/or shuffles based on strategy + Sampler(Sequence_Pick_Strategy strategy, uns64 value_range, uns64 startval, uns64 stride, uns periodicity = 1, + std::vector user_seq = {}); + + /* user supplied sequence that is either shuffled, duplicated based on stride or used to create a discrete + distribution based on weights */ + Sampler(std::vector discrete_values, Sequence_Pick_Strategy strategy, uns64 periodicity, uns sequence_length, + std::vector discrete_weights); + + // get next element + uns64 get_next_element(); + + // peek element 2 indicies away. Useful for CF workloads, does not progress the element pointer. + uns64 peek_element_following_next() const; + + // CF workloads need to know what the last target is, to keep workload bounded + uns64 get_last_element() const { return sequence_vector.back(); } + + // RANDOM NUMBER GENERATOR + static std::default_random_engine rng_engine; +}; + +#endif \ No newline at end of file diff --git a/src/frontend/synthetic/synth_fe.cc b/src/frontend/synthetic/synth_fe.cc new file mode 100644 index 00000000..8bcc9360 --- /dev/null +++ b/src/frontend/synthetic/synth_fe.cc @@ -0,0 +1,125 @@ +extern "C" { +#include "globals/assert.h" +#include "globals/global_defs.h" +#include "globals/global_types.h" +#include "globals/global_vars.h" +#include "globals/utils.h" + +#include "debug/debug.param.h" +#include "debug/debug_macros.h" +#include "debug/debug_print.h" + +#include "bp/bp.param.h" +#include "memory/memory.param.h" +} +#include + +#include "bp/bp.h" +#include "frontend/synthetic/synth_fe.h" +#include "frontend/synthetic/synthetic_kernels.h" +#include "pin/pin_lib/uop_generator.h" + +#include "ctype_pin_inst.h" +#include "kernel_params.h" +// #define PRINT_INSTRUCTION_INFO +#define DEBUG(proc_id, args...) _DEBUG(proc_id, DEBUG_SYNTHETIC_INST, ##args) + +/* intrinsic frontend variables */ +static ctype_pin_inst next_onpath_pi[MAX_NUM_PROCS]; +static ctype_pin_inst next_offpath_pi[MAX_NUM_PROCS][MAX_NUM_BPS]; +static bool off_path_mode[MAX_NUM_PROCS][MAX_NUM_BPS] = {false}; +static uint64_t off_path_addr[MAX_NUM_PROCS][MAX_NUM_BPS] = {0}; + +void synth_init() { + kernel = static_cast(KERNEL); + uop_generator_init(NUM_CORES); + synthetic_kernel_init(); + + for (uns proc_id{0}; proc_id < NUM_CORES; proc_id++) { + next_onpath_pi[proc_id] = synthetic_fe_generate_next(proc_id, false); + } +} + +void synth_done() { +} + +Addr synth_next_fetch_addr(uns proc_id) { + return next_onpath_pi[proc_id].instruction_addr; +} + +Flag synth_can_fetch_op(uns proc_id, uns bp_id) { + return !(uop_generator_get_eom(proc_id) && trace_read_done[proc_id]); +} + +void synth_fetch_op(uns proc_id, uns bp_id, struct Op_struct* op) { + bool off_path_mode_ = off_path_mode[proc_id][bp_id]; + // uns64 off_path_addr_ = off_path_addr[proc_id][bp_id]; + ctype_pin_inst* next_offpath_pi_ = &next_offpath_pi[proc_id][bp_id]; + + if (uop_generator_get_bom(proc_id)) { + if (!off_path_mode_) { + uop_generator_get_uop(proc_id, op, &next_onpath_pi[proc_id]); + } else { + uop_generator_get_uop(proc_id, op, next_offpath_pi_); + } +#ifdef PRINT_INSTRUCTION_INFO + ctype_pin_inst next_pi = off_path_mode_ ? *next_offpath_pi_ : next_onpath_pi[proc_id]; + std::cout << disasm_op(op, TRUE) << ": ip " << next_pi.instruction_addr << " Next " << next_pi.instruction_next_addr + << " size " << (uint32_t)next_pi.size << " target " << next_pi.branch_target << " size " + << (uint32_t)next_pi.size << " taken " << (uint32_t)next_pi.actually_taken << " uid " << next_pi.inst_uid + << " uid " << next_pi.inst_uid << " mem_addr " << next_pi.ld_vaddr[0] << std::endl; +#endif + } else { + uop_generator_get_uop(proc_id, op, NULL); + } + + if (uop_generator_get_eom(proc_id)) { + if (!off_path_mode_) { + next_onpath_pi[proc_id] = synthetic_fe_generate_next(proc_id, off_path_mode_); + } else { + *next_offpath_pi_ = synthetic_fe_generate_next(proc_id, off_path_mode_); + } + } +} + +void synth_redirect(uns proc_id, uns bp_id, uns64 inst_uid, Addr fetch_addr) { + if (!bp_id) + ASSERT(proc_id, fetch_addr); + if (!fetch_addr) + off_path_mode[proc_id][bp_id] = false; + else + off_path_mode[proc_id][bp_id] = true; + off_path_addr[proc_id][bp_id] = fetch_addr; + // synthetic kernel manages PCs internally using synth_fe_curr_pc variable + // on redirect we modify synth_fe_curr_pc accordingly + synth_fe_curr_pc = off_path_addr[proc_id][bp_id]; + next_offpath_pi[proc_id][bp_id] = synthetic_fe_generate_next(proc_id, off_path_mode[proc_id]); +#ifdef PRINT_INSTRUCTION_INFO + std::cout << " Redirect happened here, pred addr is " << fetch_addr << std::endl; +#endif + DEBUG(proc_id, "Redirect on-path:%lx off-path:%lx", next_onpath_pi[proc_id].instruction_addr, + next_offpath_pi[proc_id][bp_id].instruction_addr); +} + +void synth_recover(uns proc_id, uns bp_id, uns64 inst_uid) { + Op dummy_op; + if (bp_id) { + off_path_addr[proc_id][bp_id] = 0; + memset(&next_offpath_pi[proc_id][bp_id], 0, sizeof(next_offpath_pi[proc_id][bp_id])); + } else + ASSERT(proc_id, off_path_mode[proc_id][bp_id]); + off_path_mode[proc_id][bp_id] = false; + // Finish decoding of the current off-path inst before switching to on-path + while (!uop_generator_get_eom(proc_id)) { + uop_generator_get_uop(proc_id, &dummy_op, &next_offpath_pi[proc_id][bp_id]); + } + // restore synthetic frontend pc using the state that was stored before redirect + synth_fe_curr_pc = next_onpath_pi->instruction_next_addr; +#ifdef PRINT_INSTRUCTION_INFO + std::cout << " Recover happened here " << std::endl; +#endif + DEBUG(proc_id, "Recover CF:%lx ", next_onpath_pi[proc_id].instruction_addr); +} + +void synth_retire(uns proc_id, uns64 inst_uid) { +} diff --git a/src/frontend/synthetic/synth_fe.h b/src/frontend/synthetic/synth_fe.h new file mode 100644 index 00000000..b2f1ec46 --- /dev/null +++ b/src/frontend/synthetic/synth_fe.h @@ -0,0 +1,24 @@ +#ifndef __SYNTH_FE_H__ +#define __SYNTH_FE_H__ +#include "globals/global_types.h" +#ifdef __cplusplus +extern "C" { +#endif +struct Op_struct; + +void synth_init(); +void synth_done(); + +/*Front End Interface*/ +Addr synth_next_fetch_addr(uns proc_id); +Flag synth_can_fetch_op(uns proc_id, uns bp_id); +void synth_fetch_op(uns proc_id, uns bp_id, struct Op_struct* op); +void synth_redirect(uns proc_id, uns bp_id, uns64 inst_uid, Addr fetch_addr); +void synth_recover(uns proc_id, uns bp_id, uns64 inst_uid); +void synth_retire(uns proc_id, uns64 inst_uid); + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/src/frontend/synthetic/synthetic_basic_apis.cc b/src/frontend/synthetic/synthetic_basic_apis.cc new file mode 100644 index 00000000..8e348a6f --- /dev/null +++ b/src/frontend/synthetic/synthetic_basic_apis.cc @@ -0,0 +1,109 @@ +#include "isa/isa.h" + +#include "ctype_pin_inst.h" + +/* Basic one line instruction APIs */ +ctype_pin_inst generate_generic_load(uns64 ip, uns64 uid, uns64 vaddr, uns8 inst_size, uns8 ld_addr_reg, + uns8 dest_reg) { + ctype_pin_inst inst; + memset(&inst, 0, sizeof(inst)); + inst.inst_uid = uid; + inst.instruction_addr = ip; + inst.instruction_next_addr = ip + inst_size; + inst.size = inst_size; + inst.op_type = OP_ILD; + strcpy(inst.pin_iclass, "DUMMY_LOAD_DC"); + inst.num_simd_lanes = 1; + inst.lane_width_bytes = 1; + inst.is_move = 1; + inst.num_ld1_addr_regs = 1; + inst.ld1_addr_regs[0] = ld_addr_reg; + inst.ld_vaddr[0] = vaddr; + inst.num_dst_regs = 1; + inst.dst_regs[0] = dest_reg; + inst.num_ld = 1; + inst.ld_size = 8; + return inst; +} + +ctype_pin_inst generate_alu_type_inst(uns64 ip, uns64 uid, uns8 inst_size, uns8 regDest, uns8 regSrc1, uns8 regSrc2) { + ctype_pin_inst inst; + memset(&inst, 0, sizeof(inst)); + inst.inst_uid = uid; + inst.instruction_addr = ip; + inst.instruction_next_addr = ip + inst_size; + inst.size = inst_size; + inst.op_type = OP_IADD; + strcpy(inst.pin_iclass, "DUMMY_IADD"); + inst.num_simd_lanes = 1; + inst.lane_width_bytes = 1; + inst.num_src_regs = 2; + inst.num_dst_regs = 1; + inst.src_regs[0] = regSrc1; + inst.src_regs[1] = regSrc2; + inst.dst_regs[0] = regDest; + return inst; +} + +ctype_pin_inst generate_conditional_branch(uns64 ip, uns64 uid, uns64 tgtAddr, bool direction, uns8 inst_size) { + ctype_pin_inst inst; + memset(&inst, 0, sizeof(inst)); + inst.inst_uid = uid; + inst.instruction_addr = ip; + inst.instruction_next_addr = direction ? tgtAddr : (ip + inst_size); + inst.size = inst_size; + inst.op_type = OP_CF; + inst.cf_type = CF_CBR; + inst.num_simd_lanes = 1; + inst.lane_width_bytes = 1; + inst.branch_target = tgtAddr; + inst.actually_taken = direction ? TAKEN : NOT_TAKEN; + strcpy(inst.pin_iclass, "DUMMY_CBR_JMP"); + return inst; +} + +ctype_pin_inst generate_unconditional_branch(uns64 ip, uns64 uid, uns64 tgt, uns8 inst_size) { + ctype_pin_inst inst; + memset(&inst, 0, sizeof(inst)); + inst.instruction_addr = ip; + inst.inst_uid = uid; + inst.instruction_next_addr = tgt; + inst.size = inst_size; + inst.op_type = OP_CF; + inst.cf_type = CF_BR; + inst.num_simd_lanes = 1; + inst.lane_width_bytes = 1; + inst.branch_target = tgt; + inst.actually_taken = TAKEN; + strcpy(inst.pin_iclass, "DUMMY_UBR_JMP"); + return inst; +} + +ctype_pin_inst generate_indirect_branch(uns64 ip, uns64 uid, uns64 tgtAddr, uns64 vaddr, uns8 inst_size) { + ctype_pin_inst inst; + memset(&inst, 0, sizeof(inst)); + inst.instruction_addr = ip; + inst.inst_uid = uid; + inst.instruction_next_addr = tgtAddr; + inst.size = inst_size; + inst.op_type = OP_CF; + inst.cf_type = CF_IBR; + inst.num_simd_lanes = 1; + inst.lane_width_bytes = 1; + inst.branch_target = tgtAddr; + inst.actually_taken = 1; + inst.num_ld1_addr_regs = 1; + inst.ld1_addr_regs[0] = REG_RAX; + inst.ld_vaddr[0] = vaddr; + strcpy(inst.pin_iclass, "DUMMY_IBR_JUMP"); + return inst; +} + +ctype_pin_inst generate_nop(uns64 ip, uns64 uid, uns64 inst_size, bool fake) { + ctype_pin_inst inst = create_dummy_nop(ip, WPNM_NOT_IN_WPNM); + inst.size = inst_size; + inst.instruction_next_addr = ip + inst_size; + inst.inst_uid = uid; + inst.fake_inst = fake ? 0 : 1; + return inst; +} \ No newline at end of file diff --git a/src/frontend/synthetic/synthetic_bottleneck_kernels.cc b/src/frontend/synthetic/synthetic_bottleneck_kernels.cc new file mode 100644 index 00000000..36ee98da --- /dev/null +++ b/src/frontend/synthetic/synthetic_bottleneck_kernels.cc @@ -0,0 +1,311 @@ +#include +#include + +#include "bp/bp.param.h" +#include "memory/memory.param.h" + +#include "frontend/synthetic/synthetic_kernels.h" +#include "isa/isa.h" + +// pad length is the number of off-path insts to be padded to the backward branch at the tail end of every kernel +#define PAD_LENGTH 300 + +/* Helper Functions For Microkernels */ + +// Function to generate leading nops for CF workloads +uns64 gen_issue_width_lock_nops(std::map& kernel_map, Sampler& uid_sequence, uns num_of_nops, + uns64 starting_pc) { + Sampler nop_pcs(UNIFORM_SEQUENTIAL, ISSUE_WIDTH, starting_pc, NOP_SIZE, 1); + + for (uns i{0}; i < num_of_nops; i++) { + auto current_pc{nop_pcs.get_next_element()}; + kernel_map.insert({current_pc, generate_nop(current_pc, uid_sequence.get_next_element(), NOP_SIZE, false)}); + } + return nop_pcs.get_next_element(); +} + +/* Microkernel Definitions */ + +// CBR +std::map generate_cbr_kernel(Sequence_Pick_Strategy branch_direction_pick_strategy, + Sequence_Pick_Strategy branch_target_pick_strategy, + uns64 target_pool_size, double branch_t_nt_ratio, + uns64 workload_length, uns64 start_pc, uns64 start_uid) { + Sampler uid_sequence(UNIFORM_SEQUENTIAL, ((2 * target_pool_size) + PAD_LENGTH), start_uid, 1, 1); + + // distribution for every possibe pc; onpath+offpath + Sampler combined_targets_pool(branch_target_pick_strategy, ((2 * target_pool_size) + PAD_LENGTH), start_pc, + (ICACHE_LINE_SIZE), 1); + + // distribution for only taken targets + const uint target_stride = (2 * ICACHE_LINE_SIZE); + const uint starting_target = (start_pc + 2 * ICACHE_LINE_SIZE); + Sampler targets_pool(branch_target_pick_strategy, target_pool_size, starting_target, target_stride, 1); + + // branch direction distribution for both onpath and offpath branches + uns64 taken_ratio = static_cast(branch_t_nt_ratio * 100); + uns64 not_Taken_ratio = 100 - taken_ratio; + Sampler direction_sequence({0, 1}, branch_direction_pick_strategy, 1, ((2 * target_pool_size) + PAD_LENGTH), + {taken_ratio, not_Taken_ratio}); + + std::map kernel_map; + uns64 current_pc{start_pc}, _target{0}; + ctype_pin_inst next_inst; + + // Generate insts for every possible pc, includes possible offpath insts + for (uns i{0}; i < (2 * target_pool_size + PAD_LENGTH); i++) { + // generate leading nops and return next pc + current_pc = gen_issue_width_lock_nops(kernel_map, uid_sequence, ISSUE_WIDTH - 1, current_pc); + auto current_uid = uid_sequence.get_next_element(); + + // every taken-target is 2 cachelines away. + _target = combined_targets_pool.peek_element_following_next(); + next_inst = generate_conditional_branch(current_pc, current_uid, _target, direction_sequence.get_next_element(), + BRANCH_SIZE); + + // if the generated CBR's target goes out of range of target_pool, overwrite with unconditional branch to go back to + // beginning + if (next_inst.instruction_next_addr >= targets_pool.get_last_element()) + next_inst = generate_unconditional_branch(current_pc, current_uid, START_PC, BRANCH_SIZE); + + kernel_map.insert({current_pc, next_inst}); + // set up for next possible pc + current_pc = combined_targets_pool.get_next_element(); + } + + return kernel_map; +} + +// UBR +std::map generate_ubr_kernel(Sequence_Pick_Strategy branch_target_pick_strategy, + uns64 target_pool_size, uns64 workload_length, uns64 start_pc, + uns64 start_uid, uns64 starting_target, uns64 target_stride) { + assert(target_pool_size <= workload_length && "workload_length must be less than or equal to target_pool size "); + + Sampler uid_sequence(UNIFORM_SEQUENTIAL, (2 * target_pool_size + PAD_LENGTH), start_uid, 1, 1); + + // distribution for offpath+onpath targets + Sampler combined_target_pool(branch_target_pick_strategy, (2 * target_pool_size + PAD_LENGTH), start_pc, + ICACHE_LINE_SIZE, 1); + + Sampler targets_pool(branch_target_pick_strategy, target_pool_size, starting_target, target_stride, 1); + + std::map kernel_map; + ctype_pin_inst next_inst; + + uns64 current_pc{start_pc}, current_uid{0}; + for (uns i{0}; i < ((2 * target_pool_size) + PAD_LENGTH); i++) { + // generate leading nops + current_pc = gen_issue_width_lock_nops(kernel_map, uid_sequence, ISSUE_WIDTH - 1, current_pc); + current_uid = uid_sequence.get_next_element(); + + // every taken target is 2 cachelines away + uns64 next_target = combined_target_pool.peek_element_following_next(); + + // if the generated target exceeds the taken targets distribution we set next target to beginning pc + if (next_target >= targets_pool.get_last_element()) + next_target = start_pc; + + next_inst = generate_unconditional_branch(current_pc, current_uid, next_target, BRANCH_SIZE); + kernel_map.insert({current_pc, next_inst}); + // setup for next possibe pc + current_pc = combined_target_pool.get_next_element(); + } + return kernel_map; +} + +// IBR +std::map generate_ibr_kernel(Sequence_Pick_Strategy branch_target_pick_strategy, + uns64 target_pool_size, uns64 start_pc, uns64 start_uid, + uns64 target_stride, uns64 starting_target) { + Sampler uid_sequence(UNIFORM_SEQUENTIAL, (2 * target_pool_size + PAD_LENGTH), start_uid, 1, 1); + + Sampler targets_pool(branch_target_pick_strategy, target_pool_size, starting_target, target_stride, 1); + + Sampler combined_target_pool(UNIFORM_SEQUENTIAL, (2 * target_pool_size + PAD_LENGTH), start_pc, ICACHE_LINE_SIZE, 1); + // fixed mem address + static std::uniform_int_distribution uns64_dist{1, 0x00007fffffffffff}; + static uns64 memaddress = uns64_dist(Sampler::rng_engine); + std::map kernel_map; + ctype_pin_inst next_inst; + uns64 current_pc{start_pc}, current_uid{0}; + + for (uns i{0}; i < (2 * target_pool_size) + PAD_LENGTH; i++) { + // generate leading nops + current_pc = gen_issue_width_lock_nops(kernel_map, uid_sequence, ISSUE_WIDTH - 1, current_pc); + current_uid = uid_sequence.get_next_element(); + + // the target of every branch is 2 cachelines away, for Round Robin + uns64 next_target = combined_target_pool.peek_element_following_next(); + + // for round robin ibr, if we exhaust our targets we go back to beginning + if (next_target >= targets_pool.get_last_element()) + next_target = start_pc; + + // for random IBR the target is next element from the random distribution + if (branch_target_pick_strategy == UNIFORM_RANDOM) + next_target = targets_pool.get_next_element(); + + next_inst = generate_indirect_branch(current_pc, current_uid, next_target, memaddress, BRANCH_SIZE); + kernel_map.insert({current_pc, next_inst}); + // setup for next possibe pc + current_pc = combined_target_pool.get_next_element(); + } + + return kernel_map; +} + +// ILP +std::map generate_ilp_kernel(uns dependence_chain_length, uns workload_length, uns64 start_pc, + uns64 start_uid) { + Sampler uid_sequence(UNIFORM_SEQUENTIAL, (workload_length + (3 * PAD_LENGTH)), start_uid, 1, 1); + Sampler pc_sequence(UNIFORM_SEQUENTIAL, (workload_length + (3 * PAD_LENGTH)), start_pc, ALU_ADD_SIZE, 1); + + if (dependence_chain_length != 0) + assert((workload_length % dependence_chain_length) == 0 && + "workload_length must be a multiple of dependence chain length"); + std::map kernel_map; + + // zero dependence chain length means no carried loop dependence + if (dependence_chain_length == 0) { + for (uns i{0}; i < (workload_length + (3 * PAD_LENGTH)); i++) { + auto current_pc{pc_sequence.get_next_element()}; + + // if i=workload_length insert branch back to beginning. The following iterations are offpath pads for the branch + if (i == workload_length) { + kernel_map.insert({current_pc, generate_unconditional_branch(current_pc, uid_sequence.get_next_element(), + START_PC, ALU_ADD_SIZE)}); + continue; + } + kernel_map.insert( + {current_pc, generate_alu_type_inst(current_pc, uid_sequence.get_next_element(), ALU_ADD_SIZE, 1, 2, 3)}); + + // append unconditional branch to end of the kernel + } + } else { + for (uns i{0}; i < ((workload_length / dependence_chain_length) + (3 * PAD_LENGTH)); i++) { + for (uns j{0}; j < dependence_chain_length; j++) { + auto current_pc{pc_sequence.get_next_element()}; + if ((i * dependence_chain_length) + j == workload_length) { + kernel_map.insert({current_pc, generate_unconditional_branch(current_pc, uid_sequence.get_next_element(), + START_PC, ALU_ADD_SIZE)}); + continue; + } + kernel_map.insert({current_pc, generate_alu_type_inst(current_pc, uid_sequence.get_next_element(), ALU_ADD_SIZE, + j + 1, j + 1, j + 1)}); + } + } + } + + return kernel_map; +} + +// LOAD +std::map generate_load_kernel(Load_Kernel_Type type, uns workload_length, + Sequence_Pick_Strategy mem_address_pick_srategy, + uns64 start_mem_address, uns64 mem_addresses_stride, + Limit_Load_To level, uns64 start_pc, uns64 start_uid) { + // stride should be enough to cause hits at a level but misses in the precceding levels if any + uns64 stride = [&]() -> uns64 { + switch (level) { + case DCACHE_LEVEL: { + return mem_addresses_stride; + }; + case MLC_LEVEL: { + return DCACHE_SIZE / (DCACHE_ASSOC); + }; + case LLC_LEVEL: { + return MLC_SIZE / (MLC_ASSOC); + }; + case MEM_LEVEL: { + return L1_SIZE / L1_ASSOC; + } + default: + return mem_addresses_stride; + } + }(); + + // The distribution size is the number of accesses that will fully replace a cache_line of a preceeding level + // Beyond the dcache_limited workload, we cause conflict misses for each preceeding level, simplifies things + uns64 distribution_size = [&]() -> uns64 { + switch (level) { + case DCACHE_LEVEL: { + return workload_length; + }; + case MLC_LEVEL: { + return 2 * DCACHE_ASSOC; + }; + + case LLC_LEVEL: { + return 2 * MLC_ASSOC; + }; + + case MEM_LEVEL: { + return 2 * L1_ASSOC; + }; + + default: + return workload_length; + } + }(); + + Sampler uid_sequence(UNIFORM_SEQUENTIAL, (workload_length + PAD_LENGTH), start_uid, 1, 1); + + Sampler mem_address_sequence(mem_address_pick_srategy, distribution_size, start_mem_address, stride, 1); + + Sampler pc_sequence(UNIFORM_SEQUENTIAL, (workload_length + PAD_LENGTH), start_pc, LOAD_INST_SIZE, 1); + + std::map kernel_map; + + for (uns i{0}; i < (workload_length + PAD_LENGTH); i++) { + auto current_pc{pc_sequence.get_next_element()}; + + if (i == workload_length) { + // append unconditional branch to end of kernel + kernel_map.insert({current_pc, generate_unconditional_branch(current_pc, uid_sequence.get_next_element(), + START_PC, LOAD_INST_SIZE)}); + continue; + } + auto mem_addr = mem_address_sequence.get_next_element(); + switch (type) { + // generate load + case DEPENDENCE_CHAIN: { + kernel_map.insert({current_pc, generate_generic_load(current_pc, uid_sequence.get_next_element(), mem_addr, + LOAD_INST_SIZE, Reg_Id::REG_RAX, Reg_Id::REG_RAX)}); + break; + } + + case NO_DEPENDENCE_CHAIN: { + kernel_map.insert({current_pc, generate_generic_load(current_pc, uid_sequence.get_next_element(), mem_addr, + LOAD_INST_SIZE, Reg_Id::REG_RAX, Reg_Id::REG_RBX)}); + break; + } + default: + break; + } + } + return kernel_map; +} + +// ICACHE +std::map generate_icache_kernel(uns64 start_pc, uns64 start_uid) { + // generate 2*ICACHE_DEPTH worth of instructions, so entries are always replaced + uns64 workload_length = 2 * (ICACHE_SIZE / ICACHE_LINE_SIZE); + Sampler pc_sequence(UNIFORM_SEQUENTIAL, (workload_length + PAD_LENGTH), start_pc, ICACHE_LINE_SIZE, 1); + + Sampler uid_sequence(UNIFORM_SEQUENTIAL, (workload_length + PAD_LENGTH), start_uid, 1, 1); + std::map kernel_map; + for (uns64 i{0}; i < workload_length + PAD_LENGTH; i++) { + auto current_pc{pc_sequence.get_next_element()}; + if (i == workload_length) { + kernel_map.insert({current_pc, generate_unconditional_branch(current_pc, uid_sequence.get_next_element(), + START_PC, ICACHE_LINE_SIZE)}); + continue; + } + + kernel_map.insert( + {current_pc, generate_alu_type_inst(current_pc, uid_sequence.get_next_element(), ICACHE_LINE_SIZE, 1, 2, 3)}); + } + + return kernel_map; +} \ No newline at end of file diff --git a/src/frontend/synthetic/synthetic_dispatcher.cc b/src/frontend/synthetic/synthetic_dispatcher.cc new file mode 100644 index 00000000..cc9f82a0 --- /dev/null +++ b/src/frontend/synthetic/synthetic_dispatcher.cc @@ -0,0 +1,200 @@ +#include +#include + +#include "bp/bp.param.h" +#include "memory/memory.param.h" + +#include "frontend/synthetic/kernel_params.h" +#include "frontend/synthetic/synthetic_kernels.h" + +/* static globals */ +uns64 synth_fe_curr_pc{START_PC}; +uns64 synth_fe_curr_uid{UID_START}; + +/* Bottleneck name strings */ +const char* kernel_names[] = { +#define KERNEL_IMPL(id, name) name, +#include "kernel_table.def" +#undef KERNEL_IMPL + "invalid"}; + +Kernel_Enum kernel; + +/* Dispatcher helper Prototypes */ +ctype_pin_inst get_next_kernel_inst(const std::map& kernel_map); +ctype_pin_inst get_next_mem_latency_kernel_type_inst(uns proc_id, Limit_Load_To load_level); +ctype_pin_inst get_next_cbr_kernel_type_inst(uns proc_id, bool offpath, double t_nt_ratio); +ctype_pin_inst get_next_ubr_kernel_type_inst(uns proc_id, bool offpath, uns64 workload_length, uns64 target_pool_size, + uns64 starting_target, uns64 target_stride); +ctype_pin_inst get_next_ibr_kernel_type_inst(uns proc_id, bool offpath, uns64 target_stride, uns64 num_of_targets, + Sequence_Pick_Strategy target_strategy); +ctype_pin_inst get_next_ilp_kernel_type_inst(uns proc_id, uns dependence_chain_length); + +void synthetic_kernel_init() { + std::cout << "Simulating " << kernel_names[KERNEL] << " synthetic kernel" << std::endl; +} + +/* Kernel Dispatcher */ +ctype_pin_inst synthetic_fe_generate_next(uns proc_id, bool offpath) { + switch (kernel) { + case MEM_BANDWIDTH_LIMITED: { + static auto kernel_map{generate_load_kernel(NO_DEPENDENCE_CHAIN, 500, UNIFORM_SEQUENTIAL, (ICACHE_LINE_SIZE), 4, + DCACHE_LEVEL, START_PC, UID_START)}; + // get next inst + return get_next_kernel_inst(kernel_map); + } + + case DCACHE_LIMITED: + return get_next_mem_latency_kernel_type_inst(proc_id, DCACHE_LEVEL); + + case MLC_LIMITED: + return get_next_mem_latency_kernel_type_inst(proc_id, MLC_LEVEL); + + case LLC_LIMITED: + return get_next_mem_latency_kernel_type_inst(proc_id, LLC_LEVEL); + + case MEM_LIMITED: + return get_next_mem_latency_kernel_type_inst(proc_id, MEM_LEVEL); + + case CBR_LIMITED_20T: + return get_next_cbr_kernel_type_inst(proc_id, offpath, 0.2); + + case CBR_LIMITED_50T: + return get_next_cbr_kernel_type_inst(proc_id, offpath, 0.5); + + case CBR_LIMITED_80T: + return get_next_cbr_kernel_type_inst(proc_id, offpath, 0.8); + + case BTB_LIMITED_FULL_ASSOC_SWEEP: { + const uns64 target_pool_size = BTB_ASSOC + 1; + const uns64 workload_length = BTB_ASSOC + 1; + const uns64 target_Stride = BTB_ENTRIES; + const uns64 starting_target = (START_PC + BTB_ENTRIES); + + return get_next_ubr_kernel_type_inst(proc_id, offpath, workload_length, target_pool_size, starting_target, + target_Stride); + } + + case BTB_LIMITED_FULL_CAPACITY_SWEEP: { + const uns64 target_pool_size = BTB_ENTRIES + 1; + const uns64 workload_length = BTB_ENTRIES + 1; + const uns64 target_Stride = (2 * ICACHE_LINE_SIZE); + const uns64 starting_target = (START_PC + 2 * ICACHE_LINE_SIZE); + return get_next_ubr_kernel_type_inst(proc_id, offpath, workload_length, target_pool_size, starting_target, + target_Stride); + } + + case IBR_LIMITED_ROUNDROBIN_4TGTS: { + const uns target_pool_size{4}; + const uns64 target_stride{2 * ICACHE_LINE_SIZE}; + return get_next_ibr_kernel_type_inst(proc_id, offpath, target_stride, target_pool_size, UNIFORM_SEQUENTIAL); + } + + case IBR_LIMITED_Random_2TGTS: { + const uns target_pool_size{2}; + const uns64 target_stride{2 * ICACHE_LINE_SIZE}; + return get_next_ibr_kernel_type_inst(proc_id, offpath, target_stride, target_pool_size, UNIFORM_RANDOM); + } + + case IBR_LIMITED_RANDOM_4TGTS: { + const uns target_pool_size{4}; + const uns64 target_stride{2 * ICACHE_LINE_SIZE}; + return get_next_ibr_kernel_type_inst(proc_id, offpath, target_stride, target_pool_size, UNIFORM_RANDOM); + } + + case ICACHE_LIMITED: { + static auto kernel_map{generate_icache_kernel(synth_fe_curr_pc, synth_fe_curr_uid)}; + return get_next_kernel_inst(kernel_map); + } + + case ILP_LIMITED_1_DEP_CHAIN: { + return get_next_ilp_kernel_type_inst(proc_id, 1); + } + + case ILP_LIMITED_2_DEP_CHAIN: { + return get_next_ilp_kernel_type_inst(proc_id, 2); + } + + case ILP_LIMITED_4_DEP_CHAIN: { + return get_next_ilp_kernel_type_inst(proc_id, 4); + } + + default: + return generate_nop(synth_fe_curr_pc++, synth_fe_curr_uid++, NOP_SIZE, false); + } +} + +/* Helper Definitions */ + +ctype_pin_inst get_next_kernel_inst(const std::map& kernel_map) { + auto it = kernel_map.find(synth_fe_curr_pc); + auto inst = it->second; + assert(it != kernel_map.end() && "Every inst possible should be in the map"); + synth_fe_curr_pc = inst.instruction_next_addr; + return it->second; +} + +ctype_pin_inst get_next_mem_latency_kernel_type_inst(uns proc_id, Limit_Load_To load_level) { + // generate map that contains the entire workload + static auto kernel_map{generate_load_kernel(DEPENDENCE_CHAIN, 1000, UNIFORM_SEQUENTIAL, (2 * ICACHE_LINE_SIZE), 0, + load_level, START_PC, UID_START)}; + return get_next_kernel_inst(kernel_map); +} + +ctype_pin_inst get_next_cbr_kernel_type_inst(uns proc_id, bool offpath, double t_nt_ratio) { + const uns64 workload_length = 512; + // create map that contains the entire cbr workload + static std::map kernel_map = generate_cbr_kernel( + NORMAL_RANDOM, UNIFORM_SEQUENTIAL, workload_length, t_nt_ratio, workload_length, START_PC, UID_START); + + auto inst = get_next_kernel_inst(kernel_map); + /* if we are at the tail end of program regenerate the kernel - this generates new random branch directions, without + this predictor learns the pattern for bias ratios other than 50/50 T/NT */ + if (!offpath && inst.instruction_next_addr == START_PC) { + kernel_map = generate_cbr_kernel(NORMAL_RANDOM, UNIFORM_SEQUENTIAL, workload_length, t_nt_ratio, workload_length, + START_PC, UID_START); + } + return inst; +} + +ctype_pin_inst get_next_ubr_kernel_type_inst(uns proc_id, bool offpath, uns64 workload_length, uns64 target_pool_size, + uns64 starting_target, uns64 target_stride) { + // create map that contains the entire btb workload + static std::map kernel_map = generate_ubr_kernel( + UNIFORM_SEQUENTIAL, target_pool_size, workload_length, START_PC, UID_START, starting_target, target_stride); + + return get_next_kernel_inst(kernel_map); +} + +ctype_pin_inst get_next_ibr_kernel_type_inst(uns proc_id, bool offpath, uns64 target_stride, uns64 num_of_targets, + Sequence_Pick_Strategy target_strategy) { + static uint _insts_executed{0}; + /* randomising the targets can be tricky if the starting pc is part of the distribution + thus the distribution should be safely away from START_PC */ + static const uns64 starting_target{(START_PC + 2 * ICACHE_LINE_SIZE)}; + + // create map that contains the entire ibr workload + static std::map kernel_map = + generate_ibr_kernel(target_strategy, num_of_targets, START_PC, UID_START, target_stride, starting_target); + + auto inst = get_next_kernel_inst(kernel_map); + + if (!offpath) + _insts_executed++; + + /* for random ibr, if the number of insts executed equals the required random targets we re-randomize targets + by generating new kernel */ + if (!offpath && (_insts_executed == num_of_targets) && target_strategy == UNIFORM_RANDOM) { + kernel_map = + generate_ibr_kernel(target_strategy, num_of_targets, START_PC, UID_START, target_stride, starting_target); + _insts_executed = 0; + } + return inst; +} + +ctype_pin_inst get_next_ilp_kernel_type_inst(uns proc_id, uns dependence_chain_length) { + // create map that contains the entire ilp workload + static auto kernel_map{generate_ilp_kernel(dependence_chain_length, 1200, START_PC, UID_START)}; + // return next inst + return get_next_kernel_inst(kernel_map); +} \ No newline at end of file diff --git a/src/frontend/synthetic/synthetic_kernels.h b/src/frontend/synthetic/synthetic_kernels.h new file mode 100644 index 00000000..8023e087 --- /dev/null +++ b/src/frontend/synthetic/synthetic_kernels.h @@ -0,0 +1,73 @@ +#ifndef __SYNTHETIC_KERNELS_H__ +#define __SYNTHETIC_KERNELS_H__ +#include + +#include "globals/global_types.h" + +#include "ctype_pin_inst.h" +#include "sampler.h" + +/* Definies */ + +#define NOP_SIZE ICACHE_LINE_SIZE / (ISSUE_WIDTH) +#define BRANCH_SIZE ICACHE_LINE_SIZE - (NOP_SIZE * (ISSUE_WIDTH - 1)) +#define ALU_ADD_SIZE 8 +#define LOAD_INST_SIZE 8 +#define START_PC 256 +#define UID_START 1000 + +/* static globals */ +extern uns64 synth_fe_curr_pc; +extern uns64 synth_fe_curr_uid; + +/* Enum for Kernels */ +typedef enum Load_Kernel_Type_Enum { + DEPENDENCE_CHAIN, + NO_DEPENDENCE_CHAIN +} Load_Kernel_Type; + +typedef enum Limit_Load_To_Enum { + MLC_LEVEL, + LLC_LEVEL, + MEM_LEVEL, + DCACHE_LEVEL +} Limit_Load_To; + +/* Microkernels Init Utilities */ +void synthetic_kernel_init(); + +/* Kernel dispatcher */ +ctype_pin_inst synthetic_fe_generate_next(uns proc_id, bool offpath); + +/* Basic one Line APIs */ +ctype_pin_inst generate_generic_load(uns64 ip, uns64 uid, uns64 vaddr, uns8 inst_size, uns8 ld_addr_reg, uns8 dest_reg); +ctype_pin_inst generate_alu_type_inst(uns64 ip, uns64 uid, uns8 inst_size, uns8 regDest, uns8 regSrc1, uns8 regSrc2); +ctype_pin_inst generate_conditional_branch(uns64 ip, uns64 uid, uns64 tgtAddr, bool direction, uns8 inst_size); +ctype_pin_inst generate_unconditional_branch(uns64 ip, uns64 uid, uns64 tgt, uns8 inst_size); +ctype_pin_inst generate_indirect_branch(uns64 ip, uns64 uid, uns64 tgtAddr, uns64 vaddr, uns8 inst_size); +ctype_pin_inst generate_nop(uns64 ip, uns64 uid, uns64 inst_size, bool fake); + +/* Microkernel APIs */ +std::map generate_ubr_kernel(Sequence_Pick_Strategy branch_target_pick_strategy, + uns64 target_pool_size, uns64 workload_length, uns64 start_pc, + uns64 start_uid, uns64 starting_target, uns64 target_stride); + +std::map generate_ilp_kernel(uns dependence_chain_length, uns workload_length, uns64 start_pc, + uns64 start_uid); + +std::map generate_load_kernel(Load_Kernel_Type type, uns workload_length, + Sequence_Pick_Strategy mem_address_pick_srategy, + uns64 start_mem_address, uns64 mem_addresses_stride, + Limit_Load_To level, uns64 start_pc, uns64 start_uid); + +std::map generate_ibr_kernel(Sequence_Pick_Strategy branch_target_pick_strategy, + uns64 target_pool_size, uns64 start_pc, uns64 start_uid, + uns64 target_stride, uns64 starting_target); + +std::map generate_icache_kernel(uns64 start_pc, uns64 start_uid); + +std::map generate_cbr_kernel(Sequence_Pick_Strategy branch_direction_pick_strategy, + Sequence_Pick_Strategy branch_target_pick_strategy, + uns64 direction_pool_size, double branch_t_nt_ratio, + uns64 workload_length, uns64 start_pc, uns64 start_uid); +#endif \ No newline at end of file diff --git a/src/general.param.def b/src/general.param.def index 7cd2627b..f71258f8 100644 --- a/src/general.param.def +++ b/src/general.param.def @@ -111,4 +111,5 @@ DEF_PARAM( ignore_bar_fetch , IGNORE_BAR_FETCH , Flag , DEF_PARAM( trace_bbv_output , TRACE_BBV_OUTPUT , char* , string , NULL , ) DEF_PARAM( trace_footprint_output , TRACE_FOOTPRINT_OUTPUT , char* , string , "" , ) -DEF_PARAM( segment_instr_count , SEGMENT_INSTR_COUNT , uns64 , uns64 , 0 , ) \ No newline at end of file +DEF_PARAM( segment_instr_count , SEGMENT_INSTR_COUNT , uns64 , uns64 , 0 , ) +DEF_PARAM( kernel , KERNEL , uns , kernel , ILP_LIMITED_1_DEP_CHAIN, ) \ No newline at end of file diff --git a/src/param_parser.c b/src/param_parser.c index f126bc0e..c6871885 100644 --- a/src/param_parser.c +++ b/src/param_parser.c @@ -54,6 +54,7 @@ the program. This way, an exact duplicate run can be performed. #include "bp/bp.h" #include "frontend/frontend_intf.h" +#include "frontend/synthetic/kernel_params.h" #include "model.h" #include "sim.h" @@ -321,6 +322,23 @@ void get_frontend_param(const char* name, uns* variable) { FATAL_ERROR(0, "Parameter '%s' missing value --- Ignored.\n", name); } +/**************************************************************************************/ +/* get_kernel: Converts the optarg string to a number by looking it up in the + kernel_names array. The index corresponds to the entry index, which + determines the type of kernel that will be used to drive synthetic frontend. */ +void get_kernel_param(const char* name, uns* variable) { + if (optarg) { + uns ii; + + for (ii = 0; kernel_names[ii]; ii++) + if (strncmp(optarg, kernel_names[ii], MAX_STR_LENGTH) == 0) { + *variable = ii; + return; + } + FATAL_ERROR(0, "Invalid value ('%s') for parameter '%s' --- Ignored.\n", optarg, name); + } else + FATAL_ERROR(0, "Parameter '%s' missing value --- Ignored.\n", name); +} /**************************************************************************************/ /* get_dram_sched: Converts the optarg string to a number by looking it up in the dram_sched_table array. The index corresponds to the entry index, which @@ -823,4 +841,4 @@ static void print_help(void) { printf("%s", help); } -#undef DEF_PARAM +#undef DEF_PARAM \ No newline at end of file diff --git a/src/param_parser.h b/src/param_parser.h index 0c6ca2ff..a121bea7 100644 --- a/src/param_parser.h +++ b/src/param_parser.h @@ -48,6 +48,8 @@ void get_sim_mode_param(const char*, Generic_Enum*); void get_exit_cond_param(const char*, Generic_Enum*); void get_sim_model_param(const char*, uns*); void get_frontend_param(const char*, uns*); +void get_kernel_param(const char*, uns*); + // void get_dram_sched_param(const char *, uns *); // Ramulator_remove void get_float_param(const char*, float*); void get_int_param(const char*, int*);