Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 32 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,33 @@
cmake_minimum_required(VERSION 3.14)
project(smt LANGUAGES CXX CUDA)
project(smt)


# Find CUDA
find_package(CUDA 11)
enable_language(CUDA)
set(ENV{CUDA_PATH} ${CUDA_TOOLKIT_ROOT_DIR})
set(CMAKE_CUDA_COMPILER ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc)
set(CMAKE_CUDA_ARCHETECTURES 35 50 72)

if (NOT DEFINED USEDLTO)
set(USEDLTO ON)
endif()

message("")
message("CUDA_VERSION ${CUDA_VERSION}")
message("USEDLTO ${USEDLTO}")
message("CUDA_PATH ${CUDA_TOOLKIT_ROOT_DIR}")
message("CMAKE_CUDA_COMPILER ${CMAKE_CUDA_COMPILER}")
message("CMAKE_CUDA_ARCHETECTURES ${CMAKE_CUDA_ARCHETECTURES}")
message("")

if (${CUDA_VERSION_MAJOR} EQUAL 11 AND ${USEDLTO})
set(DLTO -dlto)
endif ()

set(NVCC_FLAGS -Wno-deprecated-gpu-targets --expt-relaxed-constexpr -DJFS_RUNTIME_FAILURE_CALLS_ABORT -dc)

# SMTLIB
add_library(smtlib STATIC
SMTLIB/Core.cu
SMTLIB/Logger.cu
Expand All @@ -12,18 +37,18 @@ add_library(smtlib STATIC
SMTLIB/NativeBitVector.cu)
target_include_directories (smtlib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_compile_features(smtlib PUBLIC cxx_std_11)
set_property(TARGET smtlib PROPERTY CUDA_ARCHITECTURES 35 50 72)
target_compile_options(smtlib PUBLIC ${NVCC_FLAGS})

add_executable(smt smt.cu smt.h theory.h theory.cu aes.cu cuda_aes.h)
# SMT
add_executable(smt smt.cu smt.h theory.h theory.cu)
target_include_directories (smt PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(smt LINK_PUBLIC smtlib)

set_property(TARGET smt PROPERTY CUDA_ARCHITECTURES 35 50 72)
set_property(TARGET smtlib PROPERTY CUDA_ARCHITECTURES 35 50 72)
set_property(TARGET smt PROPERTY CUDA_SEPARABLE_COMPILATION ON)
target_compile_options(smt PUBLIC ${NVCC_FLAGS})

target_compile_options(smt PUBLIC --expt-relaxed-constexpr -DJFS_RUNTIME_FAILURE_CALLS_ABORT -dc)
target_compile_options(smtlib PUBLIC --expt-relaxed-constexpr -DJFS_RUNTIME_FAILURE_CALLS_ABORT -dc)

# Move executable after build
add_custom_command(TARGET smt
POST_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ../bin/)
add_custom_command(TARGET smt
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

all:
cmake -Bbuild -GNinja .
cmake --build build -j $(shell nproc)
cmake --build build

SMTLIB/%.o: SMTLIB/%.cu
$(MAKE) -C SMTLIB
Expand All @@ -26,4 +26,4 @@ smt: theory.o smt.o aes.o $(smtlib-objs)

clean:
rm -f *.o SMTLIB/*.o smt
rm -rf build
rm -rf build bin cxx
4 changes: 2 additions & 2 deletions generate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,6 @@ for filename in "$@"; do
fi
smt2cxx "${filename}" | transpose | format > theory.cu
cp theory.cu "cxx/smt-$(basename ${filename} | tr '.' '-')".cxx
make smt
mv smt "bin/smt-$(basename ${filename} | tr '.' '-')"
make
mv bin/smt "bin/smt-$(basename ${filename} | tr '.' '-')"
done
63 changes: 27 additions & 36 deletions smt.cu
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <time.h>
#include <sys/time.h>
#include <sched.h>
#include "curand_kernel.h"
#include "SMTLIB/Float.h"
#include "SMTLIB/BufferRef.h"
#include "cuda_aes.h"
Expand Down Expand Up @@ -32,37 +33,39 @@ __host__ __device__ inline int64_t aes_pad(int64_t num) {
}

// Note: size is the *unpadded* size of the input vars
__global__ void fuzz(uint8_t *in_data, size_t size, const uint8_t *key, uint64_t *gobuf, unsigned long long *execs) {
//__global__ void fuzz(uint8_t *in_data, size_t size, const uint8_t *key, uint64_t *gobuf, unsigned long long *execs) {
__global__ void fuzz(uint8_t *in_data, size_t size, curandState *state, uint64_t *gobuf, unsigned long long *execs) {
int bindex = blockIdx.x * blockDim.x + threadIdx.x;
int64_t padded = aes_pad(size);
uint64_t offset = bindex * padded;
extern __shared__ uint8_t sdata[];
int offset = bindex * size;

// Get our local chunk
int soff = threadIdx.x * padded;
int seed = bindex*37;
curand_init(seed, bindex, 0, &state[bindex]);
curandState localState = state[bindex];

// First time initialize block to i
for (int i = 0; i < padded; i += AES_BLOCK_SIZE) {
*(uint64_t *)(sdata+soff+i) = bindex * (padded/AES_BLOCK_SIZE) + i;
}
extern __shared__ uint8_t sdata[];
int soff = threadIdx.x * size;
uint8_t *data = sdata + soff;

while (!solved) {
atomicAdd(execs, 1);
// Randomize input for our slice
for (int i = 0; i < padded; i += AES_BLOCK_SIZE) {
encrypt_one_table(sdata+soff, key, i);
uint8_t* curr = data;
//TODO: once we confirm 16bytes and we generate 8bytes, replace loop with writes
while (curr < data + size)
{
*curr++ = curand(&localState); //TODO: i think this is 8bytes but not sure, alternative is uint4
}
if (LLVMFuzzerTestOneInput(sdata+soff, size)) {

if (LLVMFuzzerTestOneInput(data, size)) {
*gobuf = bindex;
memcpy(in_data+offset, sdata+soff, size);
solved = 1;
}
// Add increment to randomize (I hope?)
for (int i = 0; i < padded; i += AES_BLOCK_SIZE) {
*(uint64_t *)(sdata+soff+i) = bindex * (padded/AES_BLOCK_SIZE) + i;
}
//for (int i = 0; i < padded; i += AES_BLOCK_SIZE) {
// *(uint64_t *)(sdata+soff+i) = bindex * (padded/AES_BLOCK_SIZE) + i;
//}
}
return;
}

void CUDART_CB finishedCB(void *data) {
Expand All @@ -76,22 +79,12 @@ void launch_kernel(int device, int varsize, uint8_t **ret_gbuf, uint64_t **ret_g
uint64_t *gobuf;
unsigned long long *gexecs;

int64_t padded = aes_pad(varsize);
printf("Padding varsize from %d to %ld\n", varsize, padded);
unsigned char ckey[AES_BLOCK_SIZE];
FILE *rng = fopen("/dev/urandom","rb");
fread(ckey, AES_BLOCK_SIZE, 1, rng);
fclose(rng);

// Pre-expand the round keys and copy to device mem
uint8_t rkey[176];
const uint8_t *drkey;
expand_key(ckey, rkey);
gpuErrchk(cudaMalloc(&drkey, 176));
gpuErrchk(cudaMemcpy((uint8_t *)drkey, rkey, sizeof(uint8_t) * 176, cudaMemcpyHostToDevice));
int size = varsize; // i think?
curandState *rngStates;
gpuErrchk(cudaMalloc(&rngStates, N*M*sizeof(curandState)));

// Alloc GPU buffers
gpuErrchk(cudaMalloc(&gbuf, padded*N*M));
gpuErrchk(cudaMalloc(&gbuf, size*N*M));
gpuErrchk(cudaMalloc(&gobuf, sizeof(uint64_t)));
gpuErrchk(cudaMalloc(&gexecs, sizeof(unsigned long long)));

Expand All @@ -105,8 +98,7 @@ void launch_kernel(int device, int varsize, uint8_t **ret_gbuf, uint64_t **ret_g
int *dev = (int *)malloc(sizeof(int));
*dev = device + 1;
printf("Launching kernel on GPU%d...\n", device);
fuzz<<<M,N,N*padded,stream>>>(gbuf, varsize, drkey, gobuf, gexecs);
gpuErrchk(cudaPeekAtLastError());
fuzz<<<M,N,N*size,stream>>>(gbuf, varsize, rngStates, gobuf, gexecs);
gpuErrchk(cudaLaunchHostFunc(stream, finishedCB, dev));
}

Expand Down Expand Up @@ -143,11 +135,10 @@ int main(int argc, char **argv) {


// Get and print output
int64_t padded = aes_pad(varsize);
uint8_t *buf = (uint8_t *)malloc(padded);
uint8_t *buf = (uint8_t*)malloc(varsize);
uint64_t oindex;
gpuErrchk(cudaMemcpy(&oindex, gobuf[i], sizeof(uint64_t), cudaMemcpyDeviceToHost));
gpuErrchk(cudaMemcpy(buf, gbuf[i]+(oindex*padded), padded, cudaMemcpyDeviceToHost));
gpuErrchk(cudaMemcpy(buf, gbuf[i]+(oindex*varsize), varsize, cudaMemcpyDeviceToHost));
printf("Found a satisfying assignment on device %d thread %lu:\n", i, oindex);
for (int k = 0; k < varsize; k++) printf("%02x", buf[k]); printf("\n");
}