forked from Xiaoyang-Lu/ChampSim_CAMAT
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit e3189e8
Showing
99 changed files
with
12,616 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
app = champsim | ||
|
||
srcExt = cc | ||
srcDir = src branch replacement prefetcher | ||
objDir = obj | ||
binDir = bin | ||
inc = inc | ||
|
||
debug = 1 | ||
|
||
CFlags = -Wall -O3 -std=c++11 -D_DEFAULT_SOURCE | ||
LDFlags = | ||
libs = | ||
libDir = | ||
|
||
|
||
#************************ DO NOT EDIT BELOW THIS LINE! ************************ | ||
|
||
ifeq ($(debug),1) | ||
debug=-g | ||
else | ||
debug= | ||
endif | ||
inc := $(addprefix -I,$(inc)) | ||
libs := $(addprefix -l,$(libs)) | ||
libDir := $(addprefix -L,$(libDir)) | ||
CFlags += -c $(debug) $(inc) $(libDir) $(libs) | ||
sources := $(shell find $(srcDir) -name '*.$(srcExt)') | ||
srcDirs := $(shell find . -name '*.$(srcExt)' -exec dirname {} \; | uniq) | ||
objects := $(patsubst %.$(srcExt),$(objDir)/%.o,$(sources)) | ||
|
||
ifeq ($(srcExt),cc) | ||
CC = $(CXX) | ||
else | ||
CFlags += -std=gnu99 | ||
endif | ||
|
||
.phony: all clean distclean | ||
|
||
|
||
all: $(binDir)/$(app) | ||
|
||
$(binDir)/$(app): buildrepo $(objects) | ||
@mkdir -p `dirname $@` | ||
@echo "Linking $@..." | ||
@$(CC) $(objects) $(LDFlags) -o $@ | ||
|
||
$(objDir)/%.o: %.$(srcExt) | ||
@echo "Generating dependencies for $<..." | ||
@$(call make-depend,$<,$@,$(subst .o,.d,$@)) | ||
@echo "Compiling $<..." | ||
@$(CC) $(CFlags) $< -o $@ | ||
|
||
clean: | ||
$(RM) -r $(objDir) | ||
|
||
distclean: clean | ||
$(RM) -r $(binDir)/$(app) | ||
|
||
buildrepo: | ||
@$(call make-repo) | ||
|
||
define make-repo | ||
for dir in $(srcDirs); \ | ||
do \ | ||
mkdir -p $(objDir)/$$dir; \ | ||
done | ||
endef | ||
|
||
|
||
# usage: $(call make-depend,source-file,object-file,depend-file) | ||
define make-depend | ||
$(CC) -MM \ | ||
-MF $3 \ | ||
-MP \ | ||
-MT $2 \ | ||
$(CFlags) \ | ||
$1 | ||
endef |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# ChampSim | ||
ChampSim is a trace-based simulator for a microarchitecture study. | ||
|
||
We integrate ChampSim with C-AMAT parameters to analyze cache performance. | ||
|
||
The C-AMAT model can be applied to multicore processors in two ways, single-core measurement, and multicore (single processor) measurement. For the former, we are interested in a single core’s, say core A’s, performance. For the latter, we are interested in the overall multicore performance as a single multicore processor. By the definition of C-AMAT, core A’s C-AMAT can be measured by core A’s number of memory accesses and core A’s memory active cycles. Likewise, the multicore C-AMAT can be measured by the multicore processor’s overall number of accesses and the multicore’s memory active cycles. | ||
|
||
In shared LLC, core A’s memory active cycles can be collected by "_active_cycles_per_core"; multicore’s memory active cycles can by collected by "_active_cycles". | ||
|
||
# How To | ||
Please read the README.old in detail to know more. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
<p align="center"> | ||
<h1 align="center"> ChampSim </h1> | ||
<p> ChampSim is a trace-based simulator for a microarchitecture study. You can sign up to the public mailing list by sending an empty mail to [email protected]. Traces for the 3rd Data Prefetching Championship (DPC-3) can be found from here (https://dpc3.compas.cs.stonybrook.edu/?SW_IS). A set of traces used for the 2nd Cache Replacement Championship (CRC-2) can be found from this link. (http://bit.ly/2t2nkUj) <p> | ||
</p> | ||
|
||
# Clone ChampSim repository | ||
``` | ||
git clone https://github.com/ChampSim/ChampSim.git | ||
``` | ||
|
||
# Compile | ||
|
||
ChampSim takes five parameters: Branch predictor, L1D prefetcher, L2C prefetcher, LLC replacement policy, and the number of cores. | ||
For example, `./build_champsim.sh bimodal no no lru 1` builds a single-core processor with bimodal branch predictor, no L1/L2 data prefetchers, and the baseline LRU replacement policy for the LLC. | ||
``` | ||
$ ./build_champsim.sh bimodal no no no lru 1 | ||
|
||
$ ./build_champsim.sh ${BRANCH} ${L1D_PREFETCHER} ${L2C_PREFETCHER} ${LLC_PREFETCHER} ${LLC_REPLACEMENT} ${NUM_CORE} | ||
``` | ||
|
||
# Download DPC-3 trace | ||
|
||
Professor Daniel Jimenez at Texas A&M University kindly provided traces for DPC-3. Use the following script to download these traces (~20GB size and max simpoint only). | ||
``` | ||
$ cd scripts | ||
|
||
$ ./download_dpc3_traces.sh | ||
``` | ||
|
||
# Run simulation | ||
|
||
Execute `run_champsim.sh` with proper input arguments. The default `TRACE_DIR` in `run_champsim.sh` is set to `$PWD/dpc3_traces`. <br> | ||
|
||
* Single-core simulation: Run simulation with `run_champsim.sh` script. | ||
|
||
``` | ||
Usage: ./run_champsim.sh [BINARY] [N_WARM] [N_SIM] [TRACE] [OPTION] | ||
$ ./run_champsim.sh bimodal-no-no-no-lru-1core 1 10 400.perlbench-41B.champsimtrace.xz | ||
|
||
${BINARY}: ChampSim binary compiled by "build_champsim.sh" (bimodal-no-no-lru-1core) | ||
${N_WARM}: number of instructions for warmup (1 million) | ||
${N_SIM}: number of instructinos for detailed simulation (10 million) | ||
${TRACE}: trace name (400.perlbench-41B.champsimtrace.xz) | ||
${OPTION}: extra option for "-low_bandwidth" (src/main.cc) | ||
``` | ||
Simulation results will be stored under "results_${N_SIM}M" as a form of "${TRACE}-${BINARY}-${OPTION}.txt".<br> | ||
|
||
* Multi-core simulation: Run simulation with `run_4core.sh` script. <br> | ||
``` | ||
Usage: ./run_4core.sh [BINARY] [N_WARM] [N_SIM] [N_MIX] [TRACE0] [TRACE1] [TRACE2] [TRACE3] [OPTION] | ||
$ ./run_4core.sh bimodal-no-no-no-lru-4core 1 10 0 400.perlbench-41B.champsimtrace.xz \\ | ||
401.bzip2-38B.champsimtrace.xz 403.gcc-17B.champsimtrace.xz 410.bwaves-945B.champsimtrace.xz | ||
``` | ||
Note that we need to specify multiple trace files for `run_4core.sh`. `N_MIX` is used to represent a unique ID for mixed multi-programmed workloads. | ||
|
||
|
||
# Add your own branch predictor, data prefetchers, and replacement policy | ||
**Copy an empty template** | ||
``` | ||
$ cp branch/branch_predictor.cc prefetcher/mybranch.bpred | ||
$ cp prefetcher/l1d_prefetcher.cc prefetcher/mypref.l1d_pref | ||
$ cp prefetcher/l2c_prefetcher.cc prefetcher/mypref.l2c_pref | ||
$ cp prefetcher/llc_prefetcher.cc prefetcher/mypref.llc_pref | ||
$ cp replacement/llc_replacement.cc replacement/myrepl.llc_repl | ||
``` | ||
|
||
**Work on your algorithms with your favorite text editor** | ||
``` | ||
$ vim branch/mybranch.bpred | ||
$ vim prefetcher/mypref.l1d_pref | ||
$ vim prefetcher/mypref.l2c_pref | ||
$ vim prefetcher/mypref.llc_pref | ||
$ vim replacement/myrepl.llc_repl | ||
``` | ||
|
||
**Compile and test** | ||
``` | ||
$ ./build_champsim.sh mybranch mypref mypref mypref myrepl 1 | ||
$ ./run_champsim.sh mybranch-mypref-mypref-mypref-myrepl-1core 1 10 bzip2_183B | ||
``` | ||
|
||
# How to create traces | ||
|
||
We have included only 4 sample traces, taken from SPEC CPU 2006. These | ||
traces are short (10 million instructions), and do not necessarily cover the range of behaviors your | ||
replacement algorithm will likely see in the full competition trace list (not | ||
included). We STRONGLY recommend creating your own traces, covering | ||
a wide variety of program types and behaviors. | ||
|
||
The included Pin Tool champsim_tracer.cpp can be used to generate new traces. | ||
We used Pin 3.2 (pin-3.2-81205-gcc-linux), and it may require | ||
installing libdwarf.so, libelf.so, or other libraries, if you do not already | ||
have them. Please refer to the Pin documentation (https://software.intel.com/sites/landingpage/pintool/docs/81205/Pin/html/) | ||
for working with Pin 3.2. | ||
|
||
Get this version of Pin: | ||
``` | ||
wget http://software.intel.com/sites/landingpage/pintool/downloads/pin-3.2-81205-gcc-linux.tar.gz | ||
``` | ||
|
||
**Use the Pin tool like this** | ||
``` | ||
pin -t obj-intel64/champsim_tracer.so -- <your program here> | ||
``` | ||
|
||
The tracer has three options you can set: | ||
``` | ||
-o | ||
Specify the output file for your trace. | ||
The default is default_trace.champsim | ||
|
||
-s <number> | ||
Specify the number of instructions to skip in the program before tracing begins. | ||
The default value is 0. | ||
|
||
-t <number> | ||
The number of instructions to trace, after -s instructions have been skipped. | ||
The default value is 1,000,000. | ||
``` | ||
For example, you could trace 200,000 instructions of the program ls, after | ||
skipping the first 100,000 instructions, with this command: | ||
``` | ||
pin -t obj/champsim_tracer.so -o traces/ls_trace.champsim -s 100000 -t 200000 -- ls | ||
``` | ||
Traces created with the champsim_tracer.so are approximately 64 bytes per instruction, | ||
but they generally compress down to less than a byte per instruction using xz compression. | ||
|
||
# Evaluate Simulation | ||
|
||
ChampSim measures the IPC (Instruction Per Cycle) value as a performance metric. <br> | ||
There are some other useful metrics printed out at the end of simulation. <br> | ||
|
||
Good luck and be a champion! <br> |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#include "ooo_cpu.h" | ||
|
||
#define BIMODAL_TABLE_SIZE 16384 | ||
#define BIMODAL_PRIME 16381 | ||
#define MAX_COUNTER 3 | ||
int bimodal_table[NUM_CPUS][BIMODAL_TABLE_SIZE]; | ||
|
||
void O3_CPU::initialize_branch_predictor() | ||
{ | ||
cout << "CPU " << cpu << " Bimodal branch predictor" << endl; | ||
|
||
for(int i = 0; i < BIMODAL_TABLE_SIZE; i++) | ||
bimodal_table[cpu][i] = 0; | ||
} | ||
|
||
uint8_t O3_CPU::predict_branch(uint64_t ip) | ||
{ | ||
uint32_t hash = ip % BIMODAL_PRIME; | ||
uint8_t prediction = (bimodal_table[cpu][hash] >= ((MAX_COUNTER + 1)/2)) ? 1 : 0; | ||
|
||
return prediction; | ||
} | ||
|
||
void O3_CPU::last_branch_result(uint64_t ip, uint8_t taken) | ||
{ | ||
uint32_t hash = ip % BIMODAL_PRIME; | ||
|
||
if (taken && (bimodal_table[cpu][hash] < MAX_COUNTER)) | ||
bimodal_table[cpu][hash]++; | ||
else if ((taken == 0) && (bimodal_table[cpu][hash] > 0)) | ||
bimodal_table[cpu][hash]--; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#include "ooo_cpu.h" | ||
|
||
#define BIMODAL_TABLE_SIZE 16384 | ||
#define BIMODAL_PRIME 16381 | ||
#define MAX_COUNTER 3 | ||
int bimodal_table[NUM_CPUS][BIMODAL_TABLE_SIZE]; | ||
|
||
void O3_CPU::initialize_branch_predictor() | ||
{ | ||
cout << "CPU " << cpu << " Bimodal branch predictor" << endl; | ||
|
||
for(int i = 0; i < BIMODAL_TABLE_SIZE; i++) | ||
bimodal_table[cpu][i] = 0; | ||
} | ||
|
||
uint8_t O3_CPU::predict_branch(uint64_t ip) | ||
{ | ||
uint32_t hash = ip % BIMODAL_PRIME; | ||
uint8_t prediction = (bimodal_table[cpu][hash] >= ((MAX_COUNTER + 1)/2)) ? 1 : 0; | ||
|
||
return prediction; | ||
} | ||
|
||
void O3_CPU::last_branch_result(uint64_t ip, uint8_t taken) | ||
{ | ||
uint32_t hash = ip % BIMODAL_PRIME; | ||
|
||
if (taken && (bimodal_table[cpu][hash] < MAX_COUNTER)) | ||
bimodal_table[cpu][hash]++; | ||
else if ((taken == 0) && (bimodal_table[cpu][hash] > 0)) | ||
bimodal_table[cpu][hash]--; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
#include "ooo_cpu.h" | ||
|
||
#define GLOBAL_HISTORY_LENGTH 14 | ||
#define GLOBAL_HISTORY_MASK (1 << GLOBAL_HISTORY_LENGTH) - 1 | ||
int branch_history_vector[NUM_CPUS]; | ||
|
||
#define GS_HISTORY_TABLE_SIZE 16384 | ||
int gs_history_table[NUM_CPUS][GS_HISTORY_TABLE_SIZE]; | ||
int my_last_prediction[NUM_CPUS]; | ||
|
||
void O3_CPU::initialize_branch_predictor() | ||
{ | ||
cout << "CPU " << cpu << " GSHARE branch predictor" << endl; | ||
|
||
branch_history_vector[cpu] = 0; | ||
my_last_prediction[cpu] = 0; | ||
|
||
for(int i=0; i<GS_HISTORY_TABLE_SIZE; i++) | ||
gs_history_table[cpu][i] = 2; // 2 is slightly taken | ||
} | ||
|
||
unsigned int gs_table_hash(uint64_t ip, int bh_vector) | ||
{ | ||
unsigned int hash = ip^(ip>>GLOBAL_HISTORY_LENGTH)^(ip>>(GLOBAL_HISTORY_LENGTH*2))^bh_vector; | ||
hash = hash%GS_HISTORY_TABLE_SIZE; | ||
|
||
//printf("%d\n", hash); | ||
|
||
return hash; | ||
} | ||
|
||
uint8_t O3_CPU::predict_branch(uint64_t ip) | ||
{ | ||
int prediction = 1; | ||
|
||
int gs_hash = gs_table_hash(ip, branch_history_vector[cpu]); | ||
|
||
if(gs_history_table[cpu][gs_hash] >= 2) | ||
prediction = 1; | ||
else | ||
prediction = 0; | ||
|
||
my_last_prediction[cpu] = prediction; | ||
|
||
return prediction; | ||
} | ||
|
||
void O3_CPU::last_branch_result(uint64_t ip, uint8_t taken) | ||
{ | ||
int gs_hash = gs_table_hash(ip, branch_history_vector[cpu]); | ||
|
||
if(taken == 1) { | ||
if(gs_history_table[cpu][gs_hash] < 3) | ||
gs_history_table[cpu][gs_hash]++; | ||
} else { | ||
if(gs_history_table[cpu][gs_hash] > 0) | ||
gs_history_table[cpu][gs_hash]--; | ||
} | ||
|
||
// update branch history vector | ||
branch_history_vector[cpu] <<= 1; | ||
branch_history_vector[cpu] &= GLOBAL_HISTORY_MASK; | ||
branch_history_vector[cpu] |= taken; | ||
} |
Oops, something went wrong.