Skip to content

Commit

Permalink
Merge branch 'dev' into sim-name
Browse files Browse the repository at this point in the history
  • Loading branch information
tgrogers authored Jan 29, 2024
2 parents e21baf5 + a1ca547 commit 3d40732
Show file tree
Hide file tree
Showing 14 changed files with 426 additions and 71 deletions.
27 changes: 27 additions & 0 deletions .github/workflows/short-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# This is a basic workflow to help you get started with Actions

name: CI

# Controls when the workflow will run
on:
# Triggers the workflow on push or pull request events but only for the mydev branch
push:
pull_request:

# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
short-tests:
runs-on: ubuntu-latest
container:
image: tgrogers/accel-sim_regress:Ubuntu-22.04-cuda-11.7
# env:

# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v2
- name: Run Simulation
run: /bin/bash $GITHUB_WORKSPACE/short-tests.sh
4 changes: 2 additions & 2 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ pipeline {
./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C QV100-SASS -T ~/../common/accel-sim/traces/volta-tesla-v100/latest/ -N sass-short-${BUILD_NUMBER}
./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C RTX2060-SASS -T ~/../common/accel-sim/traces/turing-rtx2060/latest/ -N sass-short-${BUILD_NUMBER}
./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C RTX3070-SASS -T ~/../common/accel-sim/traces/ampere-rtx3070/latest/ -N sass-short-${BUILD_NUMBER}
./util/job_launching/monitor_func_test.py -I -v -s stats-per-app-sass.csv -N sass-short-${BUILD_NUMBER}'''
./util/job_launching/monitor_func_test.py -v -s stats-per-app-sass.csv -N sass-short-${BUILD_NUMBER}'''
}, "ptx": {
sh '''#!/bin/bash -xe
source ./env-setup/11.2.1_env_setup.sh
Expand All @@ -46,7 +46,7 @@ pipeline {
./gpu-app-collection/get_regression_data.sh
./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C QV100-PTX,RTX2060-PTX,RTX3070-PTX -N short-ptx-${BUILD_NUMBER}
./util/job_launching/monitor_func_test.py -I -v -s stats-per-app-ptx.csv -N short-ptx-${BUILD_NUMBER}'''
./util/job_launching/monitor_func_test.py -v -s stats-per-app-ptx.csv -N short-ptx-${BUILD_NUMBER}'''
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion gpu-simulator/ISA_Def/ampere_opcode.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ static const std::unordered_map<std::string, OpcodeChar> Ampere_OpcodeMap = {
{"CCTLT", OpcodeChar(OP_CCTLT, ALU_OP)},

{"LDGDEPBAR", OpcodeChar(OP_LDGDEPBAR, ALU_OP)},
{"LDGSTS", OpcodeChar(OP_LDGSTS, LOAD_OP)},
{"LDGSTS", OpcodeChar(OP_LDGSTS, LOAD_OP)},

// Uniform Datapath Instruction
// UDP unit
Expand Down
24 changes: 24 additions & 0 deletions gpu-simulator/trace-driven/trace_driven.cc
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,10 @@ bool trace_warp_inst_t::parse_from_trace_struct(
cache_op = CACHE_ALL;
break;
case OP_LDG:
// LDGSTS is loading the values needed directly from the global memory to shared memory.
// Before this feature, the values need to be loaded to registers first, then store to
// the shared memory.
case OP_LDGSTS: // Add for memcpy_async
case OP_LDL:
assert(data_size > 0);
memory_op = memory_load;
Expand All @@ -267,6 +271,9 @@ bool trace_warp_inst_t::parse_from_trace_struct(
space.set_type(local_space);
else
space.set_type(global_space);
// Add for LDGSTS instruction
if (m_opcode == OP_LDGSTS)
m_is_ldgsts = true;
// check the cache scope, if its strong GPU, then bypass L1
if (trace.check_opcode_contain(opcode_tokens, "STRONG") &&
trace.check_opcode_contain(opcode_tokens, "GPU")) {
Expand Down Expand Up @@ -362,6 +369,23 @@ bool trace_warp_inst_t::parse_from_trace_struct(
// barrier_type bar_type;
// reduction_type red_type;
break;
// LDGDEPBAR is to form a group containing the previous LDGSTS instructions that
// have not been grouped yet.
// In the implementation, a group number will be assigned once the instruction is
// met.
case OP_LDGDEPBAR:
m_is_ldgdepbar = true;
break;
// DEPBAR is served as a warp-wise barrier that is only effective for LDGSTS
// instructions. It is associated with a immediate value. The immediate value
// indicates the last N LDGDEPBAR groups to not wait once the instruction is met.
// For example, if the immediate value is 1, then the last group is able to proceed
// even with DEPBAR present; if the immediate value is 0, then all of the groups
// need to finish before proceed.
case OP_DEPBAR:
m_is_depbar = true;
m_depbar_group_no = trace.imm;
break;
case OP_HADD2:
case OP_HADD2_32I:
case OP_HFMA2:
Expand Down
79 changes: 68 additions & 11 deletions gpu-simulator/trace-parser/trace_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
#include <string>
#include <vector>

#include <errno.h>
#include <unistd.h>
#include <signal.h>

#include "trace_parser.h"

bool is_number(const std::string &s) {
Expand All @@ -28,7 +32,7 @@ void split(const std::string &str, std::vector<std::string> &cont,
}
}

inst_trace_t::inst_trace_t() { memadd_info = NULL; }
inst_trace_t::inst_trace_t() { memadd_info = NULL; imm = 0;}

inst_trace_t::~inst_trace_t() {
if (memadd_info != NULL) delete memadd_info;
Expand Down Expand Up @@ -165,7 +169,7 @@ bool inst_trace_t::parse_from_string(std::string trace, unsigned trace_version,
ss >> temp;
sscanf(temp.c_str(), "R%d", &reg_src[i]);
}

// parse mem info
unsigned address_mode = 0;
unsigned mem_width = 0;
Expand Down Expand Up @@ -211,6 +215,9 @@ bool inst_trace_t::parse_from_string(std::string trace, unsigned trace_version,
memadd_info->base_delta_decompress(base_address, deltas, mask_bits);
}
}

ss >> imm;

// Finish Parsing

return true;
Expand Down Expand Up @@ -277,19 +284,65 @@ kernel_trace_t *trace_parser::parse_kernel_info(
const std::string &kerneltraces_filepath) {
kernel_trace_t *kernel_info = new kernel_trace_t;
kernel_info->enable_lineinfo = 0; // default disabled
kernel_info->ifs = new std::ifstream;
std::ifstream *ifs = kernel_info->ifs;
ifs->open(kerneltraces_filepath.c_str());

if (!ifs->is_open()) {
std::cout << "Unable to open file: " << kerneltraces_filepath << std::endl;
std::string read_trace_cmd;
int _l = kerneltraces_filepath.length();
if(_l > 3 && kerneltraces_filepath.substr(_l-3, 3) == ".xz"){
// this is xz-compressed trace
read_trace_cmd = "xz -dc " + kerneltraces_filepath;
} else if(_l > 7 && kerneltraces_filepath.substr(_l-7, 7) == ".traceg"){
// this is plain text trace
read_trace_cmd ="cat " + kerneltraces_filepath;
} else {
std::cerr << "Can't read trace. Only .xz and plain text are supported: "
<< kerneltraces_filepath <<"\n";
exit(1);
}

// Create an interprocess channel, and fork out a data source process. The
// data source process reads trace from disk, write to the channel, and the
// simulator process read from the channel.
int *pipefd = kernel_info->pipefd;
if(pipe(pipefd) != 0){
std::cerr << "Failed to create interprocess channel\n";
perror("pipe");
exit(1);
}

pid_t pid = fork();
if(pid == 0){
// The child process is the data source. Redirect its
// stdout to the write end of the pipe.
close(pipefd[0]);
dup2(pipefd[1], STDOUT_FILENO);

// When using GDB, sending Ctrl+C to the simulator will send a SIGINT signal
// to the child process as well, subsequently causing it to terminate. To
// avoid this, we let the child process ignore (SIG_IGN) the SIGINT signal.
// Reference:
// https://stackoverflow.com/questions/38404925/gdb-interrupt-running-process-without-killing-child-processes
signal(SIGINT, SIG_IGN);

execle("/bin/sh", "sh", "-c", read_trace_cmd.c_str(), NULL, environ);
perror("execle"); // the child process shouldn't reach here if all is well.
exit(1);
} else {
// parent (simulator)
close(pipefd[1]);
dup2(pipefd[0], STDIN_FILENO);
}

// Parent continues from here.
kernel_info->ifs = &std::cin;
std::istream *ifs = kernel_info->ifs;

std::cout << "Processing kernel " << kerneltraces_filepath << std::endl;

std::string line;

// Important to clear the istream. Otherwise, the eofbit from the last
// kernel may be carried over to this kernel
ifs->clear();
while (!ifs->eof()) {
getline(*ifs, line);

Expand Down Expand Up @@ -359,15 +412,19 @@ kernel_trace_t *trace_parser::parse_kernel_info(

void trace_parser::kernel_finalizer(kernel_trace_t *trace_info) {
assert(trace_info);
assert(trace_info->ifs);
if (trace_info->ifs->is_open()) trace_info->ifs->close();
delete trace_info->ifs;

// The pipe read/write end file descriptors held by the child process would
// have been automatically closed when it terminated. But the parent
// process may read an arbitrary amount of trace files, so it has to close
// all file descriptors.
close(trace_info->pipefd[0]);
close(trace_info->pipefd[1]);
delete trace_info;
}

void trace_parser::get_next_threadblock_traces(
std::vector<std::vector<inst_trace_t> *> threadblock_traces,
unsigned trace_version, unsigned enable_lineinfo, std::ifstream *ifs) {
unsigned trace_version, unsigned enable_lineinfo, std::istream *ifs) {
for (unsigned i = 0; i < threadblock_traces.size(); ++i) {
threadblock_traces[i]->clear();
}
Expand Down
9 changes: 7 additions & 2 deletions gpu-simulator/trace-parser/trace_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ struct inst_trace_t {
std::string opcode;
unsigned reg_srcs_num;
unsigned reg_src[MAX_SRC];
uint64_t imm;

inst_memadd_info_t *memadd_info;

bool parse_from_string(std::string trace, unsigned tracer_version,
Expand Down Expand Up @@ -94,7 +96,10 @@ struct kernel_trace_t {
unsigned long long shmem_base_addr;
unsigned long long local_base_addr;
// Reference to open filestream
std::ifstream *ifs;
std::istream *ifs;
// Anonymous pipe through which the trace is transmitted from a trace reader
// process to the simulator process
int pipefd[2]={};
};

class trace_parser {
Expand All @@ -110,7 +115,7 @@ class trace_parser {

void get_next_threadblock_traces(
std::vector<std::vector<inst_trace_t> *> threadblock_traces,
unsigned trace_version, unsigned enable_lineinfo, std::ifstream *ifs);
unsigned trace_version, unsigned enable_lineinfo, std::istream *ifs);

void kernel_finalizer(kernel_trace_t *trace_info);

Expand Down
2 changes: 1 addition & 1 deletion travis.sh → short-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ fi
#Make the simulator
export PATH=$CUDA_INSTALL_PATH/bin:$PATH;
source ./gpu-simulator/setup_environment.sh
make -C ./gpu-simulator
make -j -C ./gpu-simulator

#Get the pre-run trace files
rm -rf ./hw_run/rodinia_2.0-ft
Expand Down
4 changes: 0 additions & 4 deletions util/job_launching/configs/define-standard-cfgs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,6 @@ QV100:
QV100_64SM:
base_file: "$GPGPUSIM_ROOT/configs/tested-cfgs/SM7_QV100_SMs/gpgpusim.config"


QV100_SASS:
base_file: "$GPGPUSIM_ROOT/configs/tested-cfgs/SM7_QV100_SASS/gpgpusim.config"

QV100_old:
base_file: "$GPGPUSIM_ROOT/configs/tested-cfgs/SM7_QV100_old/gpgpusim.config"
GV100:
Expand Down
8 changes: 6 additions & 2 deletions util/job_launching/run_simulations.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,14 +196,13 @@ def run(self, build_handle, benchmarks, run_directory, cuda_version, simdir):
"a",
)
print(
"%s %6s %-22s %-100s %-25s %s.%s"
"%s %6s %-22s %-100s %-25s %s"
% (
time_string,
torque_out,
benchmark,
self.benchmark_args_subdirs[args],
self.run_subdir,
benchmark,
build_handle,
),
file=logfile,
Expand Down Expand Up @@ -309,6 +308,11 @@ def text_replace_torque_sim(
f.close()

if options.trace_dir == "":
# If the config contains "SASS" and you have not specified the trace directory, then likely something is wrong
if ("SASS" in self.run_subdir):
print("You are trying to run a configuration with SASS in it, but have not specified a trace directory."+\
" If you want to run SASS traces, please specify -T to point to the top-level trace directory")
exit(1)
exec_name = (
options.benchmark_exec_prefix
+ " "
Expand Down
2 changes: 1 addition & 1 deletion util/tracer_nvbit/tracer_tool/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,5 @@ typedef struct {
int32_t width;
uint32_t active_mask;
uint32_t predicate_mask;

uint64_t imm;
} inst_trace_t;
2 changes: 2 additions & 0 deletions util/tracer_nvbit/tracer_tool/inject_funcs.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ extern "C" __device__ __noinline__ void instrument_inst(
int pred, int opcode_id, int32_t vpc, bool is_mem, uint64_t addr,
int32_t width, int32_t desReg, int32_t srcReg1, int32_t srcReg2,
int32_t srcReg3, int32_t srcReg4, int32_t srcReg5, int32_t srcNum,
uint64_t immediate,
uint64_t pchannel_dev, uint64_t ptotal_dynamic_instr_counter,
uint64_t preported_dynamic_instr_counter, uint64_t pstop_report, uint32_t line_num) {

Expand Down Expand Up @@ -68,6 +69,7 @@ extern "C" __device__ __noinline__ void instrument_inst(
ma.GPRSrcs[3] = srcReg4;
ma.GPRSrcs[4] = srcReg5;
ma.numSrcs = srcNum;
ma.imm = immediate;
ma.active_mask = active_mask;
ma.predicate_mask = predicate_mask;
ma.sm_id = get_smid();
Expand Down
Loading

0 comments on commit 3d40732

Please sign in to comment.