diff --git a/Jenkinsfile b/Jenkinsfile index eb33f5d27a8..f591e796e36 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -78,7 +78,8 @@ def bootCheriBSDForAllArchitectures(params, String qemuConfig, boolean isDebug) addBootJobs(bootJobs, params, qemuConfig, architecture, "main") if (!isDebug) { // For the non-ASAN build of QEMU we also boot the latest release - addBootJobs(bootJobs, params, qemuConfig, architecture, "releng%252F22.12", "-latest-release") + def latestRelease = cheribsdInfo.getReleasedVersions()[-1] + addBootJobs(bootJobs, params, qemuConfig, architecture, "releng/${latestRelease}", "-latest-release") } def targetBranch = env.CHANGE_TARGET ? env.CHANGE_TARGET : env.BRANCH_NAME; if (targetBranch == 'dev') { @@ -106,7 +107,8 @@ def bootCheriBSD(params, String qemuConfig, String stageSuffix, String archSuffi def compressedDiskImage = "artifacts-${archSuffix}/cheribsd-${archSuffix}.img.xz" dir (stageSuffix) { sh "rm -rfv artifacts-${archSuffix}/cheribsd-*.img* artifacts-${archSuffix}/kernel*" - copyArtifacts projectName: "CheriBSD-pipeline/${cheribsdBranch}", filter: "${compressedDiskImage}, ${compressedKernel}", + def jenkinsBranchName = cheribsdBranch.replaceAll('/', '%2F') + copyArtifacts projectName: "CheriBSD-pipeline/${jenkinsBranchName}", filter: "${compressedDiskImage}, ${compressedKernel}", target: '.', fingerprintArtifacts: false, flatten: false, selector: lastSuccessful() } def testExtraArgs = [ diff --git a/include/qemu/coroutine-tls.h b/include/qemu/coroutine-tls.h new file mode 100644 index 00000000000..1558a826aa0 --- /dev/null +++ b/include/qemu/coroutine-tls.h @@ -0,0 +1,165 @@ +/* + * QEMU Thread Local Storage for coroutines + * + * Copyright Red Hat + * + * SPDX-License-Identifier: LGPL-2.1-or-later + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + * It is forbidden to access Thread Local Storage in coroutines because + * compiler optimizations may cause values to be cached across coroutine + * re-entry. Coroutines can run in more than one thread through the course of + * their life, leading bugs when stale TLS values from the wrong thread are + * used as a result of compiler optimization. + * + * An example is: + * + * ..code-block:: c + * :caption: A coroutine that may see the wrong TLS value + * + * static __thread AioContext *current_aio_context; + * ... + * static void coroutine_fn foo(void) + * { + * aio_notify(current_aio_context); + * qemu_coroutine_yield(); + * aio_notify(current_aio_context); // <-- may be stale after yielding! + * } + * + * This header provides macros for safely defining variables in Thread Local + * Storage: + * + * ..code-block:: c + * :caption: A coroutine that safely uses TLS + * + * QEMU_DEFINE_STATIC_CO_TLS(AioContext *, current_aio_context) + * ... + * static void coroutine_fn foo(void) + * { + * aio_notify(get_current_aio_context()); + * qemu_coroutine_yield(); + * aio_notify(get_current_aio_context()); // <-- safe + * } + */ + +#ifndef QEMU_COROUTINE_TLS_H +#define QEMU_COROUTINE_TLS_H + +/* + * To stop the compiler from caching TLS values we define accessor functions + * with __attribute__((noinline)) plus asm volatile("") to prevent + * optimizations that override noinline. + * + * The compiler can still analyze noinline code and make optimizations based on + * that knowledge, so an inline asm output operand is used to prevent + * optimizations that make assumptions about the address of the TLS variable. + * + * This is fragile and ultimately needs to be solved by a mechanism that is + * guaranteed to work by the compiler (e.g. stackless coroutines), but for now + * we use this approach to prevent issues. + */ + +/** + * QEMU_DECLARE_CO_TLS: + * @type: the variable's C type + * @var: the variable name + * + * Declare an extern variable in Thread Local Storage from a header file: + * + * .. code-block:: c + * :caption: Declaring an extern variable in Thread Local Storage + * + * QEMU_DECLARE_CO_TLS(int, my_count) + * ... + * int c = get_my_count(); + * set_my_count(c + 1); + * *get_ptr_my_count() = 0; + * + * This is a coroutine-safe replacement for the __thread keyword and is + * equivalent to the following code: + * + * .. code-block:: c + * :caption: Declaring a TLS variable using __thread + * + * extern __thread int my_count; + * ... + * int c = my_count; + * my_count = c + 1; + * *(&my_count) = 0; + */ +#define QEMU_DECLARE_CO_TLS(type, var) \ + __attribute__((noinline)) type get_##var(void); \ + __attribute__((noinline)) void set_##var(type v); \ + __attribute__((noinline)) type *get_ptr_##var(void); + +/** + * QEMU_DEFINE_CO_TLS: + * @type: the variable's C type + * @var: the variable name + * + * Define a variable in Thread Local Storage that was previously declared from + * a header file with QEMU_DECLARE_CO_TLS(): + * + * .. code-block:: c + * :caption: Defining a variable in Thread Local Storage + * + * QEMU_DEFINE_CO_TLS(int, my_count) + * + * This is a coroutine-safe replacement for the __thread keyword and is + * equivalent to the following code: + * + * .. code-block:: c + * :caption: Defining a TLS variable using __thread + * + * __thread int my_count; + */ +#define QEMU_DEFINE_CO_TLS(type, var) \ + static __thread type co_tls_##var; \ + type get_##var(void) { asm volatile(""); return co_tls_##var; } \ + void set_##var(type v) { asm volatile(""); co_tls_##var = v; } \ + type *get_ptr_##var(void) \ + { type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; } + +/** + * QEMU_DEFINE_STATIC_CO_TLS: + * @type: the variable's C type + * @var: the variable name + * + * Define a static variable in Thread Local Storage: + * + * .. code-block:: c + * :caption: Defining a static variable in Thread Local Storage + * + * QEMU_DEFINE_STATIC_CO_TLS(int, my_count) + * ... + * int c = get_my_count(); + * set_my_count(c + 1); + * *get_ptr_my_count() = 0; + * + * This is a coroutine-safe replacement for the __thread keyword and is + * equivalent to the following code: + * + * .. code-block:: c + * :caption: Defining a static TLS variable using __thread + * + * static __thread int my_count; + * ... + * int c = my_count; + * my_count = c + 1; + * *(&my_count) = 0; + */ +#define QEMU_DEFINE_STATIC_CO_TLS(type, var) \ + static __thread type co_tls_##var; \ + static __attribute__((noinline, unused)) \ + type get_##var(void) \ + { asm volatile(""); return co_tls_##var; } \ + static __attribute__((noinline, unused)) \ + void set_##var(type v) \ + { asm volatile(""); co_tls_##var = v; } \ + static __attribute__((noinline, unused)) \ + type *get_ptr_##var(void) \ + { type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; } + +#endif /* QEMU_COROUTINE_TLS_H */ diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h index 515d327cf11..15b7bc2d67c 100644 --- a/include/qemu/rcu.h +++ b/include/qemu/rcu.h @@ -28,6 +28,7 @@ #include "qemu/queue.h" #include "qemu/atomic.h" #include "qemu/sys_membarrier.h" +#include "qemu/coroutine-tls.h" #ifdef __cplusplus extern "C" { @@ -68,11 +69,11 @@ struct rcu_reader_data { QLIST_ENTRY(rcu_reader_data) node; }; -extern __thread struct rcu_reader_data rcu_reader; +QEMU_DECLARE_CO_TLS(struct rcu_reader_data, rcu_reader) static inline void rcu_read_lock(void) { - struct rcu_reader_data *p_rcu_reader = &rcu_reader; + struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader(); unsigned ctr; if (p_rcu_reader->depth++ > 0) { @@ -88,7 +89,7 @@ static inline void rcu_read_lock(void) static inline void rcu_read_unlock(void) { - struct rcu_reader_data *p_rcu_reader = &rcu_reader; + struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader(); assert(p_rcu_reader->depth != 0); if (--p_rcu_reader->depth > 0) { diff --git a/softmmu/cpus.c b/softmmu/cpus.c index a7ee431187a..581b7d73da1 100644 --- a/softmmu/cpus.c +++ b/softmmu/cpus.c @@ -25,6 +25,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "monitor/monitor.h" +#include "qemu/coroutine-tls.h" #include "qapi/error.h" #include "qapi/qapi-commands-machine.h" #include "qapi/qapi-commands-misc.h" @@ -472,11 +473,11 @@ bool qemu_in_vcpu_thread(void) return current_cpu && qemu_cpu_is_self(current_cpu); } -static __thread bool iothread_locked = false; +QEMU_DEFINE_STATIC_CO_TLS(bool, iothread_locked) bool qemu_mutex_iothread_locked(void) { - return iothread_locked; + return get_iothread_locked(); } /* @@ -489,13 +490,13 @@ void qemu_mutex_lock_iothread_impl(const char *file, int line) g_assert(!qemu_mutex_iothread_locked()); bql_lock(&qemu_global_mutex, file, line); - iothread_locked = true; + set_iothread_locked(true); } void qemu_mutex_unlock_iothread(void) { g_assert(qemu_mutex_iothread_locked()); - iothread_locked = false; + set_iothread_locked(false); qemu_mutex_unlock(&qemu_global_mutex); } diff --git a/target/arm/cpu.h b/target/arm/cpu.h index d28a934133b..4bbc3b6eded 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -244,6 +244,11 @@ typedef uint64_t AARCH_REG_TYPE; #define N_BANK_WITH_RESTRICTED 4 #endif +extern const char * const arm32_regnames[16]; +#ifdef TARGET_AARCH64 +extern const char * const arm64_regnames[32]; +#endif + typedef struct CPUARMState { /* Regs for current mode. */ uint32_t regs[16]; @@ -3574,6 +3579,7 @@ typedef CPUARMState CPUArchState; typedef ARMCPU ArchCPU; #include "exec/cpu-all.h" +#include "exec/log_instr.h" #include "cpu_cheri.h" #include "cheri-lazy-capregs.h" @@ -3594,11 +3600,15 @@ static inline void arm_set_xreg(CPUARMState *env, int regnum, #ifdef TARGET_CHERI update_capreg_to_intval(env, regnum, value); #else +#ifdef TARGET_AARCH64 if (is_a64(env)) { env->xregs[regnum] = value; - } else { - env->regs[regnum] = value; + qemu_log_instr_reg(env, arm64_regnames[regnum], value); + return; } +#endif + env->regs[regnum] = value; + qemu_log_instr_reg(env, arm32_regnames[regnum], value); #endif } diff --git a/target/arm/helper.c b/target/arm/helper.c index b94a8b1ca62..1a9c00370cc 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -14033,11 +14033,18 @@ void aarch64_sve_change_el(CPUARMState *env, int old_el, #endif #ifdef CONFIG_TCG_LOG_INSTR -void HELPER(arm_log_instr)(CPUARMState *env, target_ulong pc, uint32_t opcode) +void HELPER(arm_log_instr)(CPUARMState *env, uint64_t pc, uint32_t opcode, + uint32_t opcode_size) { if (qemu_log_instr_enabled(env)) { qemu_log_instr_asid(env, cpu_get_asid(env, pc)); - qemu_log_instr(env, pc, (char *)&opcode, sizeof(opcode)); + if (opcode_size == 2) { + uint16_t opcode16 = opcode; + qemu_log_instr(env, pc, (char *)&opcode16, opcode_size); + } else { + tcg_debug_assert(opcode_size == 4); + qemu_log_instr(env, pc, (char *)&opcode, opcode_size); + } } } #endif diff --git a/target/arm/helper.h b/target/arm/helper.h index ac5e8438fcd..2fee3bd47a0 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -938,7 +938,7 @@ DEF_HELPER_FLAGS_5(neon_sqrdmulh_s, TCG_CALL_NO_RWG, #endif #ifdef CONFIG_TCG_LOG_INSTR -DEF_HELPER_FLAGS_3(arm_log_instr, TCG_CALL_NO_WG, void, env, tl, i32) +DEF_HELPER_FLAGS_4(arm_log_instr, TCG_CALL_NO_WG, void, env, i64, i32, i32) #endif #ifdef TARGET_CHERI diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c index 99452d7a3bd..0967bc6401f 100644 --- a/target/arm/op_helper.c +++ b/target/arm/op_helper.c @@ -453,6 +453,7 @@ void HELPER(set_user_reg)(CPUARMState *env, uint32_t regno, uint32_t val) env->usr_regs[regno - 8] = val; } else { env->regs[regno] = val; + qemu_log_instr_reg(env, arm32_regnames[regno], val); } } diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index dbc8ace0422..37589355b14 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -123,7 +123,7 @@ static inline bool get_sctlr_sa(DisasContext *ctx) /* Load/store exclusive handling */ static TCGv_i64 cpu_exclusive_high; -static const char *regnames[] = { +const char * const arm64_regnames[32] = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", @@ -169,11 +169,11 @@ void a64_translate_init(void) _cpu_cursors_do_not_access_directly[i] = tcg_global_mem_new( cpu_env, offsetof(CPUARMState, gpcapregs.decompressed[i].cap._cr_cursor), - regnames[i]); + arm64_regnames[i]); #else cpu_X[i] = tcg_global_mem_new_i64(cpu_env, offsetof(CPUARMState, xregs[i]), - regnames[i]); + arm64_regnames[i]); #endif } @@ -15304,11 +15304,8 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s) #if defined(CONFIG_TCG_LOG_INSTR) if (unlikely(s->base.log_instr_enabled)) { - TCGv pc = tcg_const_tl(s->base.pc_next); - TCGv_i32 opc = tcg_const_i32(insn); - gen_helper_arm_log_instr(cpu_env, pc, opc); - tcg_temp_free(pc); - tcg_temp_free_i32(opc); + gen_helper_arm_log_instr(cpu_env, tcg_constant_i64(s->pc_curr), + tcg_constant_i32(insn), tcg_constant_i32(4)); } #endif diff --git a/target/arm/translate.c b/target/arm/translate.c index 2ca6c847d83..728a77473c6 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -98,7 +98,7 @@ TCGv_i64 cpu_exclusive_val; #include "exec/gen-icount.h" -static const char * const regnames[] = +const char * const arm32_regnames[16] = { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" }; @@ -116,7 +116,7 @@ void arm_translate_init(void) for (i = 0; i < 16; i++) { cpu_R[i] = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, regs[i]), - regnames[i]); + arm32_regnames[i]); } cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF"); cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF"); @@ -315,6 +315,16 @@ static void store_reg(DisasContext *s, int reg, TCGv_i32 var) s->base.is_jmp = DISAS_JUMP; } tcg_gen_mov_i32(cpu_R[reg], var); +#ifdef CONFIG_TCG_LOG_INSTR + if (qemu_ctx_logging_enabled(s)) { + TCGv_ptr name = tcg_const_ptr(arm32_regnames[reg]); + TCGv new_val = tcg_temp_new(); + tcg_gen_extu_i32_tl(new_val, var); + gen_helper_qemu_log_instr_reg(cpu_env, name, new_val); + tcg_temp_free(new_val); + tcg_temp_free_ptr(name); + } +#endif tcg_temp_free_i32(var); } @@ -9110,9 +9120,8 @@ static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) #if defined(CONFIG_TCG_LOG_INSTR) if (unlikely(dcbase->log_instr_enabled)) { - TCGv pc = tcg_const_tl(dcbase->pc_next); - gen_helper_arm_log_instr(cpu_env, pc, tcg_constant_i32(insn)); - tcg_temp_free(pc); + gen_helper_arm_log_instr(cpu_env, tcg_constant_i64(dc->pc_curr), + tcg_constant_i32(insn), tcg_constant_i32(4)); } #endif @@ -9193,6 +9202,16 @@ static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) } dc->insn = insn; +#if defined(CONFIG_TCG_LOG_INSTR) + if (unlikely(dcbase->log_instr_enabled)) { + /* For Thumb we have to undo the 16-bit swap above for disassembly. */ + gen_helper_arm_log_instr( + cpu_env, tcg_constant_i64(dc->pc_curr), + tcg_constant_i32(is_16bit ? insn : rol32(insn, 16)), + tcg_constant_i32(is_16bit ? 2 : 4)); + } +#endif + if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) { uint32_t cond = dc->condexec_cond; diff --git a/tests/unit/rcutorture.c b/tests/unit/rcutorture.c index de6f649058e..495a4e6f420 100644 --- a/tests/unit/rcutorture.c +++ b/tests/unit/rcutorture.c @@ -122,7 +122,7 @@ static void *rcu_read_perf_test(void *arg) rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); qatomic_inc(&nthreadsrunning); while (goflag == GOFLAG_INIT) { g_usleep(1000); @@ -148,7 +148,7 @@ static void *rcu_update_perf_test(void *arg) rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); qatomic_inc(&nthreadsrunning); while (goflag == GOFLAG_INIT) { g_usleep(1000); @@ -253,7 +253,7 @@ static void *rcu_read_stress_test(void *arg) rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); while (goflag == GOFLAG_INIT) { g_usleep(1000); } @@ -304,7 +304,7 @@ static void *rcu_update_stress_test(void *arg) struct rcu_stress *cp = qatomic_read(&rcu_stress_current); rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); while (goflag == GOFLAG_INIT) { g_usleep(1000); @@ -347,7 +347,7 @@ static void *rcu_fake_update_stress_test(void *arg) { rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); while (goflag == GOFLAG_INIT) { g_usleep(1000); } diff --git a/tests/unit/test-rcu-list.c b/tests/unit/test-rcu-list.c index 49641e19366..64b81ae0581 100644 --- a/tests/unit/test-rcu-list.c +++ b/tests/unit/test-rcu-list.c @@ -171,7 +171,7 @@ static void *rcu_q_reader(void *arg) rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); qatomic_inc(&nthreadsrunning); while (qatomic_read(&goflag) == GOFLAG_INIT) { g_usleep(1000); @@ -206,7 +206,7 @@ static void *rcu_q_updater(void *arg) long long n_removed_local = 0; struct list_element *el, *prev_el; - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); qatomic_inc(&nthreadsrunning); while (qatomic_read(&goflag) == GOFLAG_INIT) { g_usleep(1000); diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c index 904b375192c..127d5a13c8e 100644 --- a/util/coroutine-ucontext.c +++ b/util/coroutine-ucontext.c @@ -25,6 +25,7 @@ #include "qemu/osdep.h" #include #include "qemu/coroutine_int.h" +#include "qemu/coroutine-tls.h" #ifdef CONFIG_VALGRIND_H #include @@ -66,8 +67,8 @@ typedef struct { /** * Per-thread coroutine bookkeeping */ -static __thread CoroutineUContext leader; -static __thread Coroutine *current; +QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current); +QEMU_DEFINE_STATIC_CO_TLS(CoroutineUContext, leader); /* * va_args to makecontext() must be type 'int', so passing @@ -97,14 +98,15 @@ static inline __attribute__((always_inline)) void finish_switch_fiber(void *fake_stack_save) { #ifdef CONFIG_ASAN + CoroutineUContext *leaderp = get_ptr_leader(); const void *bottom_old; size_t size_old; __sanitizer_finish_switch_fiber(fake_stack_save, &bottom_old, &size_old); - if (!leader.stack) { - leader.stack = (void *)bottom_old; - leader.stack_size = size_old; + if (!leaderp->stack) { + leaderp->stack = (void *)bottom_old; + leaderp->stack_size = size_old; } #endif #ifdef CONFIG_TSAN @@ -161,8 +163,10 @@ static void coroutine_trampoline(int i0, int i1) /* Initialize longjmp environment and switch back the caller */ if (!sigsetjmp(self->env, 0)) { - start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, leader.stack, - leader.stack_size); + CoroutineUContext *leaderp = get_ptr_leader(); + + start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, + leaderp->stack, leaderp->stack_size); start_switch_fiber_tsan(&fake_stack_save, self, true); /* true=caller */ siglongjmp(*(sigjmp_buf *)co->entry_arg, 1); } @@ -297,7 +301,7 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, int ret; void *fake_stack_save = NULL; - current = to_; + set_current(to_); ret = sigsetjmp(from->env, 0); if (ret == 0) { @@ -315,18 +319,24 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, Coroutine *qemu_coroutine_self(void) { - if (!current) { - current = &leader.base; + Coroutine *self = get_current(); + CoroutineUContext *leaderp = get_ptr_leader(); + + if (!self) { + self = &leaderp->base; + set_current(self); } #ifdef CONFIG_TSAN - if (!leader.tsan_co_fiber) { - leader.tsan_co_fiber = __tsan_get_current_fiber(); + if (!leaderp->tsan_co_fiber) { + leaderp->tsan_co_fiber = __tsan_get_current_fiber(); } #endif - return current; + return self; } bool qemu_in_coroutine(void) { - return current && current->caller; + Coroutine *self = get_current(); + + return self && self->caller; } diff --git a/util/coroutine-win32.c b/util/coroutine-win32.c index de6bd4fd3e4..c02a62c8969 100644 --- a/util/coroutine-win32.c +++ b/util/coroutine-win32.c @@ -25,6 +25,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "qemu/coroutine_int.h" +#include "qemu/coroutine-tls.h" typedef struct { @@ -34,8 +35,8 @@ typedef struct CoroutineAction action; } CoroutineWin32; -static __thread CoroutineWin32 leader; -static __thread Coroutine *current; +QEMU_DEFINE_STATIC_CO_TLS(CoroutineWin32, leader); +QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current); /* This function is marked noinline to prevent GCC from inlining it * into coroutine_trampoline(). If we allow it to do that then it @@ -52,7 +53,7 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_); CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_); - current = to_; + set_current(to_); to->action = action; SwitchToFiber(to->fiber); @@ -89,14 +90,21 @@ void qemu_coroutine_delete(Coroutine *co_) Coroutine *qemu_coroutine_self(void) { + Coroutine *current = get_current(); + if (!current) { - current = &leader.base; - leader.fiber = ConvertThreadToFiber(NULL); + CoroutineWin32 *leader = get_ptr_leader(); + + current = &leader->base; + set_current(current); + leader->fiber = ConvertThreadToFiber(NULL); } return current; } bool qemu_in_coroutine(void) { + Coroutine *current = get_current(); + return current && current->caller; } diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c index 38fb6d3084d..22071dac4b8 100644 --- a/util/qemu-coroutine.c +++ b/util/qemu-coroutine.c @@ -18,6 +18,7 @@ #include "qemu/atomic.h" #include "qemu/coroutine.h" #include "qemu/coroutine_int.h" +#include "qemu/coroutine-tls.h" #include "block/aio.h" enum { @@ -27,17 +28,20 @@ enum { /** Free list to speed up creation */ static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); static unsigned int release_pool_size; -static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool); -static __thread unsigned int alloc_pool_size; -static __thread Notifier coroutine_pool_cleanup_notifier; + +typedef QSLIST_HEAD(, Coroutine) CoroutineQSList; +QEMU_DEFINE_STATIC_CO_TLS(CoroutineQSList, alloc_pool); +QEMU_DEFINE_STATIC_CO_TLS(unsigned int, alloc_pool_size); +QEMU_DEFINE_STATIC_CO_TLS(Notifier, coroutine_pool_cleanup_notifier); static void coroutine_pool_cleanup(Notifier *n, void *value) { Coroutine *co; Coroutine *tmp; + CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); - QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) { - QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); + QSLIST_FOREACH_SAFE(co, alloc_pool, pool_next, tmp) { + QSLIST_REMOVE_HEAD(alloc_pool, pool_next); qemu_coroutine_delete(co); } } @@ -47,27 +51,30 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) Coroutine *co = NULL; if (CONFIG_COROUTINE_POOL) { - co = QSLIST_FIRST(&alloc_pool); + CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); + + co = QSLIST_FIRST(alloc_pool); if (!co) { if (release_pool_size > POOL_BATCH_SIZE) { /* Slow path; a good place to register the destructor, too. */ - if (!coroutine_pool_cleanup_notifier.notify) { - coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup; - qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier); + Notifier *notifier = get_ptr_coroutine_pool_cleanup_notifier(); + if (!notifier->notify) { + notifier->notify = coroutine_pool_cleanup; + qemu_thread_atexit_add(notifier); } /* This is not exact; there could be a little skew between * release_pool_size and the actual size of release_pool. But * it is just a heuristic, it does not need to be perfect. */ - alloc_pool_size = qatomic_xchg(&release_pool_size, 0); - QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool); - co = QSLIST_FIRST(&alloc_pool); + set_alloc_pool_size(qatomic_xchg(&release_pool_size, 0)); + QSLIST_MOVE_ATOMIC(alloc_pool, &release_pool); + co = QSLIST_FIRST(alloc_pool); } } if (co) { - QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); - alloc_pool_size--; + QSLIST_REMOVE_HEAD(alloc_pool, pool_next); + set_alloc_pool_size(get_alloc_pool_size() - 1); } } @@ -91,9 +98,9 @@ static void coroutine_delete(Coroutine *co) qatomic_inc(&release_pool_size); return; } - if (alloc_pool_size < POOL_BATCH_SIZE) { - QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next); - alloc_pool_size++; + if (get_alloc_pool_size() < POOL_BATCH_SIZE) { + QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next); + set_alloc_pool_size(get_alloc_pool_size() + 1); return; } } diff --git a/util/rcu.c b/util/rcu.c index 13ac0f75cb2..40062b8ed20 100644 --- a/util/rcu.c +++ b/util/rcu.c @@ -64,7 +64,7 @@ static inline int rcu_gp_ongoing(unsigned long *ctr) /* Written to only by each individual reader. Read by both the reader and the * writers. */ -__thread struct rcu_reader_data rcu_reader; +QEMU_DEFINE_CO_TLS(struct rcu_reader_data, rcu_reader) /* Protected by rcu_registry_lock. */ typedef QLIST_HEAD(, rcu_reader_data) ThreadList; @@ -350,16 +350,16 @@ void drain_call_rcu(void) void rcu_register_thread(void) { - assert(rcu_reader.ctr == 0); + assert(get_ptr_rcu_reader()->ctr == 0); qemu_mutex_lock(&rcu_registry_lock); - QLIST_INSERT_HEAD(®istry, &rcu_reader, node); + QLIST_INSERT_HEAD(®istry, get_ptr_rcu_reader(), node); qemu_mutex_unlock(&rcu_registry_lock); } void rcu_unregister_thread(void) { qemu_mutex_lock(&rcu_registry_lock); - QLIST_REMOVE(&rcu_reader, node); + QLIST_REMOVE(get_ptr_rcu_reader(), node); qemu_mutex_unlock(&rcu_registry_lock); }