diff --git a/Makefile b/Makefile index 4468d0d0..cc5032a6 100644 --- a/Makefile +++ b/Makefile @@ -49,7 +49,7 @@ $(eval $(require-config)) OUT ?= build BIN := $(OUT)/rv32emu -CFLAGS = -std=gnu99 $(KCONFIG_CFLAGS) -Wall -Wextra -Werror +CFLAGS = -std=gnu11 $(KCONFIG_CFLAGS) -Wall -Wextra -Werror CFLAGS += -Wno-unused-label -include src/common.h -Isrc/ $(CFLAGS_NO_CET) LDFLAGS += $(KCONFIG_LDFLAGS) OBJS_EXT := diff --git a/src/common.h b/src/common.h index 6036698c..6ccaa6ea 100644 --- a/src/common.h +++ b/src/common.h @@ -218,6 +218,98 @@ static inline uint8_t ilog2(uint32_t x) #define HAVE_MMAP 1 #endif +/* Portable atomic operations + * + * Prefer GNU __atomic builtins (GCC 4.7+, Clang 3.1+) as they work with + * non-_Atomic types, which is required for our existing struct definitions. + * C11 stdatomic requires _Atomic type qualifiers on all atomic variables. + * + * Memory ordering constants: + * ATOMIC_RELAXED - No synchronization, only atomicity guaranteed + * ATOMIC_ACQUIRE - Prevents reordering of subsequent reads + * ATOMIC_RELEASE - Prevents reordering of preceding writes + * ATOMIC_SEQ_CST - Full sequential consistency (strongest) + */ +#if defined(__GNUC__) || defined(__clang__) +/* GNU __atomic builtins (GCC 4.7+, Clang 3.1+) */ +#define HAVE_C11_ATOMICS 0 + +#define ATOMIC_RELAXED __ATOMIC_RELAXED +#define ATOMIC_ACQUIRE __ATOMIC_ACQUIRE +#define ATOMIC_RELEASE __ATOMIC_RELEASE +#define ATOMIC_SEQ_CST __ATOMIC_SEQ_CST + +#define ATOMIC_LOAD(ptr, order) __atomic_load_n(ptr, order) +#define ATOMIC_STORE(ptr, val, order) __atomic_store_n(ptr, val, order) +#define ATOMIC_FETCH_ADD(ptr, val, order) __atomic_fetch_add(ptr, val, order) +#define ATOMIC_FETCH_SUB(ptr, val, order) __atomic_fetch_sub(ptr, val, order) +#define ATOMIC_EXCHANGE(ptr, val, order) __atomic_exchange_n(ptr, val, order) +#define ATOMIC_COMPARE_EXCHANGE_WEAK(ptr, expected, desired, succ, fail) \ + __atomic_compare_exchange_n(ptr, expected, desired, 1, succ, fail) + +#elif !defined(__EMSCRIPTEN__) && defined(__STDC_VERSION__) && \ + (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) +/* C11 atomics fallback - requires GNU __typeof__ extension for type inference. + * Note: The cast to (_Atomic T*) is technically undefined behavior per C11, + * but is a widely-used idiom that works correctly on GCC/Clang. + */ +#include +#define HAVE_C11_ATOMICS 1 + +#define ATOMIC_RELAXED memory_order_relaxed +#define ATOMIC_ACQUIRE memory_order_acquire +#define ATOMIC_RELEASE memory_order_release +#define ATOMIC_SEQ_CST memory_order_seq_cst + +#define ATOMIC_LOAD(ptr, order) \ + atomic_load_explicit((_Atomic __typeof__(*(ptr)) *) (ptr), order) +#define ATOMIC_STORE(ptr, val, order) \ + atomic_store_explicit((_Atomic __typeof__(*(ptr)) *) (ptr), val, order) +#define ATOMIC_FETCH_ADD(ptr, val, order) \ + atomic_fetch_add_explicit((_Atomic __typeof__(*(ptr)) *) (ptr), val, order) +#define ATOMIC_FETCH_SUB(ptr, val, order) \ + atomic_fetch_sub_explicit((_Atomic __typeof__(*(ptr)) *) (ptr), val, order) +#define ATOMIC_EXCHANGE(ptr, val, order) \ + atomic_exchange_explicit((_Atomic __typeof__(*(ptr)) *) (ptr), val, order) +#define ATOMIC_COMPARE_EXCHANGE_WEAK(ptr, expected, desired, succ, fail) \ + atomic_compare_exchange_weak_explicit( \ + (_Atomic __typeof__(*(ptr)) *) (ptr), expected, desired, succ, fail) + +#else +/* No atomic support - single-threaded fallback (T2C requires atomics) */ +#define HAVE_C11_ATOMICS 0 + +#if defined(_MSC_VER) +#pragma message("No atomic operations available. T2C JIT will be disabled.") +#else +#warning "No atomic operations available. T2C JIT will be disabled." +#endif + +#define ATOMIC_RELAXED 0 +#define ATOMIC_ACQUIRE 0 +#define ATOMIC_RELEASE 0 +#define ATOMIC_SEQ_CST 0 + +/* Simple non-atomic fallback - only safe for single-threaded use. + * WARNING: ATOMIC_EXCHANGE returns new value (not old) without extensions. + * T2C/GDBSTUB require proper atomics and will have data races on these + * platforms - they should be disabled when atomics are unavailable. + */ +#define ATOMIC_LOAD(ptr, order) (*(ptr)) +#define ATOMIC_STORE(ptr, val, order) ((void) (*(ptr) = (val))) +#define ATOMIC_FETCH_ADD(ptr, val, order) \ + ((*(ptr) += (val)) - (val)) /* return old value */ +#define ATOMIC_FETCH_SUB(ptr, val, order) \ + ((*(ptr) -= (val)) + (val)) /* return old value */ +/* ATOMIC_EXCHANGE cannot return old value without statement expressions. + * This returns NEW value - callers must not rely on return value. */ +#define ATOMIC_EXCHANGE(ptr, val, order) (*(ptr) = (val)) +/* ATOMIC_COMPARE_EXCHANGE_WEAK: non-atomic, single-threaded only */ +#define ATOMIC_COMPARE_EXCHANGE_WEAK(ptr, expected, desired, succ, fail) \ + ((*(ptr) == *(expected)) ? (*(ptr) = (desired), 1) \ + : (*(expected) = *(ptr), 0)) +#endif + /* Pattern Matching for C macros. * https://github.com/pfultz2/Cloak/wiki/C-Preprocessor-tricks,-tips,-and-idioms */ diff --git a/src/emulate.c b/src/emulate.c index a283267c..bedf8753 100644 --- a/src/emulate.c +++ b/src/emulate.c @@ -2051,7 +2051,7 @@ void rv_step(void *arg) #if RV32_HAS(JIT) #if RV32_HAS(T2C) /* executed through the tier-2 JIT compiler */ - if (__atomic_load_n(&block->hot2, __ATOMIC_ACQUIRE)) { + if (ATOMIC_LOAD(&block->hot2, ATOMIC_ACQUIRE)) { /* Atomic load-acquire pairs with store-release in t2c_compile(). * Ensures we see the updated block->func after observing hot2=true. */ @@ -2063,17 +2063,25 @@ void rv_step(void *arg) prev = NULL; continue; } /* check if invoking times of t1 generated code exceed threshold */ - else if (!block->compiled && block->n_invoke >= THRESHOLD) { - block->compiled = true; + else if (!ATOMIC_LOAD(&block->compiled, ATOMIC_RELAXED) && + ATOMIC_LOAD(&block->n_invoke, ATOMIC_RELAXED) >= THRESHOLD) { + ATOMIC_STORE(&block->compiled, true, ATOMIC_RELAXED); queue_entry_t *entry = malloc(sizeof(queue_entry_t)); if (unlikely(!entry)) { /* Malloc failed - reset compiled flag to allow retry later */ - block->compiled = false; + ATOMIC_STORE(&block->compiled, false, ATOMIC_RELAXED); continue; } - entry->block = block; + /* Store cache key instead of pointer to prevent use-after-free */ +#if RV32_HAS(SYSTEM) + entry->key = + (uint64_t) block->pc_start | ((uint64_t) block->satp << 32); +#else + entry->key = (uint64_t) block->pc_start; +#endif pthread_mutex_lock(&rv->wait_queue_lock); list_add(&entry->list, &rv->wait_queue); + pthread_cond_signal(&rv->wait_queue_cond); pthread_mutex_unlock(&rv->wait_queue_lock); } #endif @@ -2085,7 +2093,11 @@ void rv_step(void *arg) * entry in compiled binary buffer. */ if (block->hot) { +#if RV32_HAS(T2C) + ATOMIC_FETCH_ADD(&block->n_invoke, 1, ATOMIC_RELAXED); +#else block->n_invoke++; +#endif #if defined(__aarch64__) /* Ensure instruction cache coherency before executing JIT code */ __asm__ volatile("isb" ::: "memory"); diff --git a/src/gdbstub.c b/src/gdbstub.c index e7987f5e..996d74db 100644 --- a/src/gdbstub.c +++ b/src/gdbstub.c @@ -78,7 +78,7 @@ static int rv_write_mem(void *args, size_t addr, size_t len, void *val) static inline bool rv_is_interrupt(riscv_t *rv) { - return __atomic_load_n(&rv->is_interrupted, __ATOMIC_RELAXED); + return ATOMIC_LOAD(&rv->is_interrupted, ATOMIC_RELAXED); } static gdb_action_t rv_cont(void *args) @@ -94,7 +94,7 @@ static gdb_action_t rv_cont(void *args) } /* Clear the interrupt if it's pending */ - __atomic_store_n(&rv->is_interrupted, false, __ATOMIC_RELAXED); + ATOMIC_STORE(&rv->is_interrupted, false, ATOMIC_RELAXED); return ACT_RESUME; } @@ -133,7 +133,7 @@ static void rv_on_interrupt(void *args) riscv_t *rv = (riscv_t *) args; /* Notify the emulator to break out the for loop in rv_cont */ - __atomic_store_n(&rv->is_interrupted, true, __ATOMIC_RELAXED); + ATOMIC_STORE(&rv->is_interrupted, true, ATOMIC_RELAXED); } const struct target_ops gdbstub_ops = { diff --git a/src/riscv.c b/src/riscv.c index b14490f8..84b3b300 100644 --- a/src/riscv.c +++ b/src/riscv.c @@ -206,26 +206,41 @@ static pthread_t t2c_thread; static void *t2c_runloop(void *arg) { riscv_t *rv = (riscv_t *) arg; + pthread_mutex_lock(&rv->wait_queue_lock); while (!rv->quit) { - queue_entry_t *entry = NULL; + /* Wait for work or quit signal */ + while (list_empty(&rv->wait_queue) && !rv->quit) + pthread_cond_wait(&rv->wait_queue_cond, &rv->wait_queue_lock); - /* Acquire lock before checking and accessing the queue to prevent - * race conditions with the main thread adding entries. - */ - pthread_mutex_lock(&rv->wait_queue_lock); - if (!list_empty(&rv->wait_queue)) { - entry = list_last_entry(&rv->wait_queue, queue_entry_t, list); - list_del_init(&entry->list); - } + if (rv->quit) + break; + + /* Extract work item while holding the lock */ + queue_entry_t *entry = + list_last_entry(&rv->wait_queue, queue_entry_t, list); + list_del_init(&entry->list); pthread_mutex_unlock(&rv->wait_queue_lock); - if (entry) { - pthread_mutex_lock(&rv->cache_lock); - t2c_compile(rv, entry->block); - pthread_mutex_unlock(&rv->cache_lock); - free(entry); - } + /* Perform compilation with cache lock */ + pthread_mutex_lock(&rv->cache_lock); + /* Look up block from cache using the key (might have been evicted) */ + uint32_t pc = (uint32_t) entry->key; + block_t *block = (block_t *) cache_get(rv->block_cache, pc, false); +#if RV32_HAS(SYSTEM) + /* Verify SATP matches (for system mode) */ + uint32_t satp = (uint32_t) (entry->key >> 32); + if (block && block->satp != satp) + block = NULL; +#endif + /* Compile only if block still exists in cache */ + if (block) + t2c_compile(rv, block); + pthread_mutex_unlock(&rv->cache_lock); + free(entry); + + pthread_mutex_lock(&rv->wait_queue_lock); } + pthread_mutex_unlock(&rv->wait_queue_lock); return NULL; } #endif @@ -851,6 +866,7 @@ riscv_t *rv_create(riscv_user_t rv_attr) /* prepare wait queue. */ pthread_mutex_init(&rv->wait_queue_lock, NULL); pthread_mutex_init(&rv->cache_lock, NULL); + pthread_cond_init(&rv->wait_queue_cond, NULL); INIT_LIST_HEAD(&rv->wait_queue); /* activate the background compilation thread. */ pthread_create(&t2c_thread, NULL, t2c_runloop, rv); @@ -984,10 +1000,24 @@ void rv_delete(riscv_t *rv) block_map_destroy(rv); #else #if RV32_HAS(T2C) + /* Signal the thread to quit */ + pthread_mutex_lock(&rv->wait_queue_lock); rv->quit = true; + pthread_cond_signal(&rv->wait_queue_cond); + pthread_mutex_unlock(&rv->wait_queue_lock); + pthread_join(t2c_thread, NULL); + + /* Clean up any remaining entries in wait queue */ + queue_entry_t *entry, *safe; + list_for_each_entry_safe (entry, safe, &rv->wait_queue, list) { + list_del(&entry->list); + free(entry); + } + pthread_mutex_destroy(&rv->wait_queue_lock); pthread_mutex_destroy(&rv->cache_lock); + pthread_cond_destroy(&rv->wait_queue_cond); jit_cache_exit(rv->jit_cache); #endif jit_state_exit(rv->jit_state); diff --git a/src/riscv_private.h b/src/riscv_private.h index eab25648..ecaacfef 100644 --- a/src/riscv_private.h +++ b/src/riscv_private.h @@ -132,7 +132,7 @@ typedef struct block { /* T2C implies JIT (enforced by Kconfig and feature.h) */ #if RV32_HAS(T2C) typedef struct { - block_t *block; + uint64_t key; /**< cache key (PC or PC|SATP) to look up block */ struct list_head list; } queue_entry_t; #endif @@ -253,7 +253,8 @@ struct riscv_internal { #if RV32_HAS(T2C) struct list_head wait_queue; pthread_mutex_t wait_queue_lock, cache_lock; - volatile bool quit; /**< Determine the main thread is terminated or not */ + pthread_cond_t wait_queue_cond; + bool quit; /**< termination flag, protected by wait_queue_lock */ #endif void *jit_state; void *jit_cache; diff --git a/src/t2c.c b/src/t2c.c index 83b2be86..12ef1f8b 100644 --- a/src/t2c.c +++ b/src/t2c.c @@ -269,6 +269,10 @@ static void t2c_trace_ebb(LLVMBuilderRef *builder, void t2c_compile(riscv_t *rv, block_t *block) { + /* Skip if already compiled (defensive check) */ + if (ATOMIC_LOAD(&block->hot2, ATOMIC_ACQUIRE)) + return; + LLVMModuleRef module = LLVMModuleCreateWithName("my_module"); /* FIXME: riscv_t structure would change according to different * configuration. The linked block might jump to the wrong function pointer. @@ -350,7 +354,7 @@ void t2c_compile(riscv_t *rv, block_t *block) * are visible to other threads before they observe hot2=true. * Pairs with atomic load-acquire in rv_step(). */ - __atomic_store_n(&block->hot2, true, __ATOMIC_RELEASE); + ATOMIC_STORE(&block->hot2, true, ATOMIC_RELEASE); } struct jit_cache *jit_cache_init() @@ -373,7 +377,7 @@ void jit_cache_update(struct jit_cache *cache, uint64_t key, void *entry) * entry is guaranteed to be visible. */ cache[pos].entry = entry; - __atomic_store_n(&cache[pos].key, key, __ATOMIC_RELEASE); + ATOMIC_STORE(&cache[pos].key, key, ATOMIC_RELEASE); } void jit_cache_clear(struct jit_cache *cache)