From b168573dd9ac5eeba9270834114f6a9b77807ad4 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Tue, 18 Jun 2024 11:53:50 -0700 Subject: [PATCH 01/11] Fix program counter reported for Arm32 instruction tracing It was logging the next address rather than the current one so was always off by 4. --- target/arm/translate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/arm/translate.c b/target/arm/translate.c index 2ca6c847d8..faed6ead36 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -9110,7 +9110,7 @@ static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) #if defined(CONFIG_TCG_LOG_INSTR) if (unlikely(dcbase->log_instr_enabled)) { - TCGv pc = tcg_const_tl(dcbase->pc_next); + TCGv pc = tcg_const_tl(dc->pc_curr); gen_helper_arm_log_instr(cpu_env, pc, tcg_constant_i32(insn)); tcg_temp_free(pc); } From 90d7e31365e81f0782ebb964d83282fff2d77fd1 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Fri, 31 May 2024 15:50:41 -0700 Subject: [PATCH 02/11] Log changes to most Arm32 registers This does not include a full audit of all writes (helpers that directly modify registers are not included), but at least handles the common case where store_reg() is called. --- target/arm/cpu.h | 14 ++++++++++++-- target/arm/op_helper.c | 1 + target/arm/translate-a64.c | 6 +++--- target/arm/translate.c | 14 ++++++++++++-- 4 files changed, 28 insertions(+), 7 deletions(-) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index d28a934133..4bbc3b6ede 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -244,6 +244,11 @@ typedef uint64_t AARCH_REG_TYPE; #define N_BANK_WITH_RESTRICTED 4 #endif +extern const char * const arm32_regnames[16]; +#ifdef TARGET_AARCH64 +extern const char * const arm64_regnames[32]; +#endif + typedef struct CPUARMState { /* Regs for current mode. */ uint32_t regs[16]; @@ -3574,6 +3579,7 @@ typedef CPUARMState CPUArchState; typedef ARMCPU ArchCPU; #include "exec/cpu-all.h" +#include "exec/log_instr.h" #include "cpu_cheri.h" #include "cheri-lazy-capregs.h" @@ -3594,11 +3600,15 @@ static inline void arm_set_xreg(CPUARMState *env, int regnum, #ifdef TARGET_CHERI update_capreg_to_intval(env, regnum, value); #else +#ifdef TARGET_AARCH64 if (is_a64(env)) { env->xregs[regnum] = value; - } else { - env->regs[regnum] = value; + qemu_log_instr_reg(env, arm64_regnames[regnum], value); + return; } +#endif + env->regs[regnum] = value; + qemu_log_instr_reg(env, arm32_regnames[regnum], value); #endif } diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c index 99452d7a3b..0967bc6401 100644 --- a/target/arm/op_helper.c +++ b/target/arm/op_helper.c @@ -453,6 +453,7 @@ void HELPER(set_user_reg)(CPUARMState *env, uint32_t regno, uint32_t val) env->usr_regs[regno - 8] = val; } else { env->regs[regno] = val; + qemu_log_instr_reg(env, arm32_regnames[regno], val); } } diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index dbc8ace042..2982700c32 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -123,7 +123,7 @@ static inline bool get_sctlr_sa(DisasContext *ctx) /* Load/store exclusive handling */ static TCGv_i64 cpu_exclusive_high; -static const char *regnames[] = { +const char * const arm64_regnames[32] = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", @@ -169,11 +169,11 @@ void a64_translate_init(void) _cpu_cursors_do_not_access_directly[i] = tcg_global_mem_new( cpu_env, offsetof(CPUARMState, gpcapregs.decompressed[i].cap._cr_cursor), - regnames[i]); + arm64_regnames[i]); #else cpu_X[i] = tcg_global_mem_new_i64(cpu_env, offsetof(CPUARMState, xregs[i]), - regnames[i]); + arm64_regnames[i]); #endif } diff --git a/target/arm/translate.c b/target/arm/translate.c index faed6ead36..ba2118442d 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -98,7 +98,7 @@ TCGv_i64 cpu_exclusive_val; #include "exec/gen-icount.h" -static const char * const regnames[] = +const char * const arm32_regnames[16] = { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" }; @@ -116,7 +116,7 @@ void arm_translate_init(void) for (i = 0; i < 16; i++) { cpu_R[i] = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, regs[i]), - regnames[i]); + arm32_regnames[i]); } cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF"); cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF"); @@ -315,6 +315,16 @@ static void store_reg(DisasContext *s, int reg, TCGv_i32 var) s->base.is_jmp = DISAS_JUMP; } tcg_gen_mov_i32(cpu_R[reg], var); +#ifdef CONFIG_TCG_LOG_INSTR + if (qemu_ctx_logging_enabled(s)) { + TCGv_ptr name = tcg_const_ptr(arm32_regnames[reg]); + TCGv new_val = tcg_temp_new(); + tcg_gen_extu_i32_tl(new_val, var); + gen_helper_qemu_log_instr_reg(cpu_env, name, new_val); + tcg_temp_free(new_val); + tcg_temp_free_ptr(name); + } +#endif tcg_temp_free_i32(var); } From c785485820b381145b41e35b4f422064646b5e0a Mon Sep 17 00:00:00 2001 From: Jessica Clarke Date: Wed, 17 Jul 2024 18:48:06 +0100 Subject: [PATCH 03/11] Jenkinsfile: Pass unencoded branch name to cheribsdInfo --- Jenkinsfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index eb33f5d27a..a6be3668ad 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -78,7 +78,7 @@ def bootCheriBSDForAllArchitectures(params, String qemuConfig, boolean isDebug) addBootJobs(bootJobs, params, qemuConfig, architecture, "main") if (!isDebug) { // For the non-ASAN build of QEMU we also boot the latest release - addBootJobs(bootJobs, params, qemuConfig, architecture, "releng%252F22.12", "-latest-release") + addBootJobs(bootJobs, params, qemuConfig, architecture, "releng/22.12", "-latest-release") } def targetBranch = env.CHANGE_TARGET ? env.CHANGE_TARGET : env.BRANCH_NAME; if (targetBranch == 'dev') { @@ -106,7 +106,8 @@ def bootCheriBSD(params, String qemuConfig, String stageSuffix, String archSuffi def compressedDiskImage = "artifacts-${archSuffix}/cheribsd-${archSuffix}.img.xz" dir (stageSuffix) { sh "rm -rfv artifacts-${archSuffix}/cheribsd-*.img* artifacts-${archSuffix}/kernel*" - copyArtifacts projectName: "CheriBSD-pipeline/${cheribsdBranch}", filter: "${compressedDiskImage}, ${compressedKernel}", + def jenkinsBranchName = cheribsdBranch.replaceAll('/', '%2F') + copyArtifacts projectName: "CheriBSD-pipeline/${jenkinsBranchName}", filter: "${compressedDiskImage}, ${compressedKernel}", target: '.', fingerprintArtifacts: false, flatten: false, selector: lastSuccessful() } def testExtraArgs = [ From f1098ba7ac0dfcf7ee7fa1a01953d68a68232190 Mon Sep 17 00:00:00 2001 From: Jessica Clarke Date: Wed, 17 Jul 2024 17:25:43 +0100 Subject: [PATCH 04/11] Jenkinsfile: Get latest release's version from cheribsdInfo This bumps it to 23.11 and will automatically pick up 24.05 soon. --- Jenkinsfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index a6be3668ad..f591e796e3 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -78,7 +78,8 @@ def bootCheriBSDForAllArchitectures(params, String qemuConfig, boolean isDebug) addBootJobs(bootJobs, params, qemuConfig, architecture, "main") if (!isDebug) { // For the non-ASAN build of QEMU we also boot the latest release - addBootJobs(bootJobs, params, qemuConfig, architecture, "releng/22.12", "-latest-release") + def latestRelease = cheribsdInfo.getReleasedVersions()[-1] + addBootJobs(bootJobs, params, qemuConfig, architecture, "releng/${latestRelease}", "-latest-release") } def targetBranch = env.CHANGE_TARGET ? env.CHANGE_TARGET : env.BRANCH_NAME; if (targetBranch == 'dev') { From 4e7686c6728daf62fa9b95738a409c037ddc16eb Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Tue, 18 Jun 2024 14:16:04 -0700 Subject: [PATCH 05/11] Add Thumb instruction tracing for Arm32 --- target/arm/helper.c | 11 +++++++++-- target/arm/helper.h | 2 +- target/arm/translate-a64.c | 7 ++----- target/arm/translate.c | 15 ++++++++++++--- 4 files changed, 24 insertions(+), 11 deletions(-) diff --git a/target/arm/helper.c b/target/arm/helper.c index b94a8b1ca6..1a9c00370c 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -14033,11 +14033,18 @@ void aarch64_sve_change_el(CPUARMState *env, int old_el, #endif #ifdef CONFIG_TCG_LOG_INSTR -void HELPER(arm_log_instr)(CPUARMState *env, target_ulong pc, uint32_t opcode) +void HELPER(arm_log_instr)(CPUARMState *env, uint64_t pc, uint32_t opcode, + uint32_t opcode_size) { if (qemu_log_instr_enabled(env)) { qemu_log_instr_asid(env, cpu_get_asid(env, pc)); - qemu_log_instr(env, pc, (char *)&opcode, sizeof(opcode)); + if (opcode_size == 2) { + uint16_t opcode16 = opcode; + qemu_log_instr(env, pc, (char *)&opcode16, opcode_size); + } else { + tcg_debug_assert(opcode_size == 4); + qemu_log_instr(env, pc, (char *)&opcode, opcode_size); + } } } #endif diff --git a/target/arm/helper.h b/target/arm/helper.h index ac5e8438fc..2fee3bd47a 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -938,7 +938,7 @@ DEF_HELPER_FLAGS_5(neon_sqrdmulh_s, TCG_CALL_NO_RWG, #endif #ifdef CONFIG_TCG_LOG_INSTR -DEF_HELPER_FLAGS_3(arm_log_instr, TCG_CALL_NO_WG, void, env, tl, i32) +DEF_HELPER_FLAGS_4(arm_log_instr, TCG_CALL_NO_WG, void, env, i64, i32, i32) #endif #ifdef TARGET_CHERI diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 2982700c32..37589355b1 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -15304,11 +15304,8 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s) #if defined(CONFIG_TCG_LOG_INSTR) if (unlikely(s->base.log_instr_enabled)) { - TCGv pc = tcg_const_tl(s->base.pc_next); - TCGv_i32 opc = tcg_const_i32(insn); - gen_helper_arm_log_instr(cpu_env, pc, opc); - tcg_temp_free(pc); - tcg_temp_free_i32(opc); + gen_helper_arm_log_instr(cpu_env, tcg_constant_i64(s->pc_curr), + tcg_constant_i32(insn), tcg_constant_i32(4)); } #endif diff --git a/target/arm/translate.c b/target/arm/translate.c index ba2118442d..728a77473c 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -9120,9 +9120,8 @@ static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) #if defined(CONFIG_TCG_LOG_INSTR) if (unlikely(dcbase->log_instr_enabled)) { - TCGv pc = tcg_const_tl(dc->pc_curr); - gen_helper_arm_log_instr(cpu_env, pc, tcg_constant_i32(insn)); - tcg_temp_free(pc); + gen_helper_arm_log_instr(cpu_env, tcg_constant_i64(dc->pc_curr), + tcg_constant_i32(insn), tcg_constant_i32(4)); } #endif @@ -9203,6 +9202,16 @@ static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) } dc->insn = insn; +#if defined(CONFIG_TCG_LOG_INSTR) + if (unlikely(dcbase->log_instr_enabled)) { + /* For Thumb we have to undo the 16-bit swap above for disassembly. */ + gen_helper_arm_log_instr( + cpu_env, tcg_constant_i64(dc->pc_curr), + tcg_constant_i32(is_16bit ? insn : rol32(insn, 16)), + tcg_constant_i32(is_16bit ? 2 : 4)); + } +#endif + if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) { uint32_t cond = dc->condexec_cond; From 833dc7233a370530f5558df67e09d8326769cb51 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 22 Feb 2022 14:01:47 +0000 Subject: [PATCH 06/11] tls: add macros for coroutine-safe TLS variables Compiler optimizations can cache TLS values across coroutine yield points, resulting in stale values from the previous thread when a coroutine is re-entered by a new thread. Serge Guelton developed an __attribute__((noinline)) wrapper and tested it with clang and gcc. I formatted his idea according to QEMU's coding style and wrote documentation. The compiler can still optimize based on analyzing noinline code, so an asm volatile barrier with an output constraint is required to prevent unwanted optimizations. Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1952483 Suggested-by: Serge Guelton Signed-off-by: Stefan Hajnoczi Message-Id: <20220222140150.27240-2-stefanha@redhat.com> Signed-off-by: Kevin Wolf (cherry picked from commit 7d29c341c9d402cf0bcb3a3b76fce0c09dd24e94) --- include/qemu/coroutine-tls.h | 165 +++++++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 include/qemu/coroutine-tls.h diff --git a/include/qemu/coroutine-tls.h b/include/qemu/coroutine-tls.h new file mode 100644 index 0000000000..1558a826aa --- /dev/null +++ b/include/qemu/coroutine-tls.h @@ -0,0 +1,165 @@ +/* + * QEMU Thread Local Storage for coroutines + * + * Copyright Red Hat + * + * SPDX-License-Identifier: LGPL-2.1-or-later + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + * It is forbidden to access Thread Local Storage in coroutines because + * compiler optimizations may cause values to be cached across coroutine + * re-entry. Coroutines can run in more than one thread through the course of + * their life, leading bugs when stale TLS values from the wrong thread are + * used as a result of compiler optimization. + * + * An example is: + * + * ..code-block:: c + * :caption: A coroutine that may see the wrong TLS value + * + * static __thread AioContext *current_aio_context; + * ... + * static void coroutine_fn foo(void) + * { + * aio_notify(current_aio_context); + * qemu_coroutine_yield(); + * aio_notify(current_aio_context); // <-- may be stale after yielding! + * } + * + * This header provides macros for safely defining variables in Thread Local + * Storage: + * + * ..code-block:: c + * :caption: A coroutine that safely uses TLS + * + * QEMU_DEFINE_STATIC_CO_TLS(AioContext *, current_aio_context) + * ... + * static void coroutine_fn foo(void) + * { + * aio_notify(get_current_aio_context()); + * qemu_coroutine_yield(); + * aio_notify(get_current_aio_context()); // <-- safe + * } + */ + +#ifndef QEMU_COROUTINE_TLS_H +#define QEMU_COROUTINE_TLS_H + +/* + * To stop the compiler from caching TLS values we define accessor functions + * with __attribute__((noinline)) plus asm volatile("") to prevent + * optimizations that override noinline. + * + * The compiler can still analyze noinline code and make optimizations based on + * that knowledge, so an inline asm output operand is used to prevent + * optimizations that make assumptions about the address of the TLS variable. + * + * This is fragile and ultimately needs to be solved by a mechanism that is + * guaranteed to work by the compiler (e.g. stackless coroutines), but for now + * we use this approach to prevent issues. + */ + +/** + * QEMU_DECLARE_CO_TLS: + * @type: the variable's C type + * @var: the variable name + * + * Declare an extern variable in Thread Local Storage from a header file: + * + * .. code-block:: c + * :caption: Declaring an extern variable in Thread Local Storage + * + * QEMU_DECLARE_CO_TLS(int, my_count) + * ... + * int c = get_my_count(); + * set_my_count(c + 1); + * *get_ptr_my_count() = 0; + * + * This is a coroutine-safe replacement for the __thread keyword and is + * equivalent to the following code: + * + * .. code-block:: c + * :caption: Declaring a TLS variable using __thread + * + * extern __thread int my_count; + * ... + * int c = my_count; + * my_count = c + 1; + * *(&my_count) = 0; + */ +#define QEMU_DECLARE_CO_TLS(type, var) \ + __attribute__((noinline)) type get_##var(void); \ + __attribute__((noinline)) void set_##var(type v); \ + __attribute__((noinline)) type *get_ptr_##var(void); + +/** + * QEMU_DEFINE_CO_TLS: + * @type: the variable's C type + * @var: the variable name + * + * Define a variable in Thread Local Storage that was previously declared from + * a header file with QEMU_DECLARE_CO_TLS(): + * + * .. code-block:: c + * :caption: Defining a variable in Thread Local Storage + * + * QEMU_DEFINE_CO_TLS(int, my_count) + * + * This is a coroutine-safe replacement for the __thread keyword and is + * equivalent to the following code: + * + * .. code-block:: c + * :caption: Defining a TLS variable using __thread + * + * __thread int my_count; + */ +#define QEMU_DEFINE_CO_TLS(type, var) \ + static __thread type co_tls_##var; \ + type get_##var(void) { asm volatile(""); return co_tls_##var; } \ + void set_##var(type v) { asm volatile(""); co_tls_##var = v; } \ + type *get_ptr_##var(void) \ + { type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; } + +/** + * QEMU_DEFINE_STATIC_CO_TLS: + * @type: the variable's C type + * @var: the variable name + * + * Define a static variable in Thread Local Storage: + * + * .. code-block:: c + * :caption: Defining a static variable in Thread Local Storage + * + * QEMU_DEFINE_STATIC_CO_TLS(int, my_count) + * ... + * int c = get_my_count(); + * set_my_count(c + 1); + * *get_ptr_my_count() = 0; + * + * This is a coroutine-safe replacement for the __thread keyword and is + * equivalent to the following code: + * + * .. code-block:: c + * :caption: Defining a static TLS variable using __thread + * + * static __thread int my_count; + * ... + * int c = my_count; + * my_count = c + 1; + * *(&my_count) = 0; + */ +#define QEMU_DEFINE_STATIC_CO_TLS(type, var) \ + static __thread type co_tls_##var; \ + static __attribute__((noinline, unused)) \ + type get_##var(void) \ + { asm volatile(""); return co_tls_##var; } \ + static __attribute__((noinline, unused)) \ + void set_##var(type v) \ + { asm volatile(""); co_tls_##var = v; } \ + static __attribute__((noinline, unused)) \ + type *get_ptr_##var(void) \ + { type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; } + +#endif /* QEMU_COROUTINE_TLS_H */ From a3dabe57691ab38c7ccee2b76dc6a935253fa81c Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 22 Feb 2022 14:01:49 +0000 Subject: [PATCH 07/11] rcu: use coroutine TLS macros RCU may be used from coroutines. Standard __thread variables cannot be used by coroutines. Use the coroutine TLS macros instead. Signed-off-by: Stefan Hajnoczi Message-Id: <20220222140150.27240-4-stefanha@redhat.com> Signed-off-by: Kevin Wolf (cherry picked from commit 17c78154b0ba2237c37f3e4a95140b754cb6ac8b) --- include/qemu/rcu.h | 7 ++++--- tests/unit/rcutorture.c | 10 +++++----- tests/unit/test-rcu-list.c | 4 ++-- util/rcu.c | 8 ++++---- 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h index 515d327cf1..15b7bc2d67 100644 --- a/include/qemu/rcu.h +++ b/include/qemu/rcu.h @@ -28,6 +28,7 @@ #include "qemu/queue.h" #include "qemu/atomic.h" #include "qemu/sys_membarrier.h" +#include "qemu/coroutine-tls.h" #ifdef __cplusplus extern "C" { @@ -68,11 +69,11 @@ struct rcu_reader_data { QLIST_ENTRY(rcu_reader_data) node; }; -extern __thread struct rcu_reader_data rcu_reader; +QEMU_DECLARE_CO_TLS(struct rcu_reader_data, rcu_reader) static inline void rcu_read_lock(void) { - struct rcu_reader_data *p_rcu_reader = &rcu_reader; + struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader(); unsigned ctr; if (p_rcu_reader->depth++ > 0) { @@ -88,7 +89,7 @@ static inline void rcu_read_lock(void) static inline void rcu_read_unlock(void) { - struct rcu_reader_data *p_rcu_reader = &rcu_reader; + struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader(); assert(p_rcu_reader->depth != 0); if (--p_rcu_reader->depth > 0) { diff --git a/tests/unit/rcutorture.c b/tests/unit/rcutorture.c index de6f649058..495a4e6f42 100644 --- a/tests/unit/rcutorture.c +++ b/tests/unit/rcutorture.c @@ -122,7 +122,7 @@ static void *rcu_read_perf_test(void *arg) rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); qatomic_inc(&nthreadsrunning); while (goflag == GOFLAG_INIT) { g_usleep(1000); @@ -148,7 +148,7 @@ static void *rcu_update_perf_test(void *arg) rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); qatomic_inc(&nthreadsrunning); while (goflag == GOFLAG_INIT) { g_usleep(1000); @@ -253,7 +253,7 @@ static void *rcu_read_stress_test(void *arg) rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); while (goflag == GOFLAG_INIT) { g_usleep(1000); } @@ -304,7 +304,7 @@ static void *rcu_update_stress_test(void *arg) struct rcu_stress *cp = qatomic_read(&rcu_stress_current); rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); while (goflag == GOFLAG_INIT) { g_usleep(1000); @@ -347,7 +347,7 @@ static void *rcu_fake_update_stress_test(void *arg) { rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); while (goflag == GOFLAG_INIT) { g_usleep(1000); } diff --git a/tests/unit/test-rcu-list.c b/tests/unit/test-rcu-list.c index 49641e1936..64b81ae058 100644 --- a/tests/unit/test-rcu-list.c +++ b/tests/unit/test-rcu-list.c @@ -171,7 +171,7 @@ static void *rcu_q_reader(void *arg) rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); qatomic_inc(&nthreadsrunning); while (qatomic_read(&goflag) == GOFLAG_INIT) { g_usleep(1000); @@ -206,7 +206,7 @@ static void *rcu_q_updater(void *arg) long long n_removed_local = 0; struct list_element *el, *prev_el; - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); qatomic_inc(&nthreadsrunning); while (qatomic_read(&goflag) == GOFLAG_INIT) { g_usleep(1000); diff --git a/util/rcu.c b/util/rcu.c index 13ac0f75cb..40062b8ed2 100644 --- a/util/rcu.c +++ b/util/rcu.c @@ -64,7 +64,7 @@ static inline int rcu_gp_ongoing(unsigned long *ctr) /* Written to only by each individual reader. Read by both the reader and the * writers. */ -__thread struct rcu_reader_data rcu_reader; +QEMU_DEFINE_CO_TLS(struct rcu_reader_data, rcu_reader) /* Protected by rcu_registry_lock. */ typedef QLIST_HEAD(, rcu_reader_data) ThreadList; @@ -350,16 +350,16 @@ void drain_call_rcu(void) void rcu_register_thread(void) { - assert(rcu_reader.ctr == 0); + assert(get_ptr_rcu_reader()->ctr == 0); qemu_mutex_lock(&rcu_registry_lock); - QLIST_INSERT_HEAD(®istry, &rcu_reader, node); + QLIST_INSERT_HEAD(®istry, get_ptr_rcu_reader(), node); qemu_mutex_unlock(&rcu_registry_lock); } void rcu_unregister_thread(void) { qemu_mutex_lock(&rcu_registry_lock); - QLIST_REMOVE(&rcu_reader, node); + QLIST_REMOVE(get_ptr_rcu_reader(), node); qemu_mutex_unlock(&rcu_registry_lock); } From 0d106190031539cb6cfdc5d96202e942f04948c1 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 22 Feb 2022 14:01:50 +0000 Subject: [PATCH 08/11] cpus: use coroutine TLS macros for iothread_locked qemu_mutex_iothread_locked() may be used from coroutines. Standard __thread variables cannot be used by coroutines. Use the coroutine TLS macros instead. Signed-off-by: Stefan Hajnoczi Message-Id: <20220222140150.27240-5-stefanha@redhat.com> Signed-off-by: Kevin Wolf (cherry picked from commit d5d2b15ecf62c662985983ca065ddeeec48fd248) --- softmmu/cpus.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/softmmu/cpus.c b/softmmu/cpus.c index a7ee431187..581b7d73da 100644 --- a/softmmu/cpus.c +++ b/softmmu/cpus.c @@ -25,6 +25,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "monitor/monitor.h" +#include "qemu/coroutine-tls.h" #include "qapi/error.h" #include "qapi/qapi-commands-machine.h" #include "qapi/qapi-commands-misc.h" @@ -472,11 +473,11 @@ bool qemu_in_vcpu_thread(void) return current_cpu && qemu_cpu_is_self(current_cpu); } -static __thread bool iothread_locked = false; +QEMU_DEFINE_STATIC_CO_TLS(bool, iothread_locked) bool qemu_mutex_iothread_locked(void) { - return iothread_locked; + return get_iothread_locked(); } /* @@ -489,13 +490,13 @@ void qemu_mutex_lock_iothread_impl(const char *file, int line) g_assert(!qemu_mutex_iothread_locked()); bql_lock(&qemu_global_mutex, file, line); - iothread_locked = true; + set_iothread_locked(true); } void qemu_mutex_unlock_iothread(void) { g_assert(qemu_mutex_iothread_locked()); - iothread_locked = false; + set_iothread_locked(false); qemu_mutex_unlock(&qemu_global_mutex); } From 034b9d30eb31a6e661d2d530e0431df023de7068 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 7 Mar 2022 15:38:51 +0000 Subject: [PATCH 09/11] coroutine-ucontext: use QEMU_DEFINE_STATIC_CO_TLS() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thread-Local Storage variables cannot be used directly from coroutine code because the compiler may optimize TLS variable accesses across qemu_coroutine_yield() calls. When the coroutine is re-entered from another thread the TLS variables from the old thread must no longer be used. Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables. Signed-off-by: Stefan Hajnoczi Message-Id: <20220307153853.602859-2-stefanha@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Kevin Wolf (cherry picked from commit 34145a307d849d0b6734d0222a7aa0bb9eef7407) --- util/coroutine-ucontext.c | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c index 904b375192..127d5a13c8 100644 --- a/util/coroutine-ucontext.c +++ b/util/coroutine-ucontext.c @@ -25,6 +25,7 @@ #include "qemu/osdep.h" #include #include "qemu/coroutine_int.h" +#include "qemu/coroutine-tls.h" #ifdef CONFIG_VALGRIND_H #include @@ -66,8 +67,8 @@ typedef struct { /** * Per-thread coroutine bookkeeping */ -static __thread CoroutineUContext leader; -static __thread Coroutine *current; +QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current); +QEMU_DEFINE_STATIC_CO_TLS(CoroutineUContext, leader); /* * va_args to makecontext() must be type 'int', so passing @@ -97,14 +98,15 @@ static inline __attribute__((always_inline)) void finish_switch_fiber(void *fake_stack_save) { #ifdef CONFIG_ASAN + CoroutineUContext *leaderp = get_ptr_leader(); const void *bottom_old; size_t size_old; __sanitizer_finish_switch_fiber(fake_stack_save, &bottom_old, &size_old); - if (!leader.stack) { - leader.stack = (void *)bottom_old; - leader.stack_size = size_old; + if (!leaderp->stack) { + leaderp->stack = (void *)bottom_old; + leaderp->stack_size = size_old; } #endif #ifdef CONFIG_TSAN @@ -161,8 +163,10 @@ static void coroutine_trampoline(int i0, int i1) /* Initialize longjmp environment and switch back the caller */ if (!sigsetjmp(self->env, 0)) { - start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, leader.stack, - leader.stack_size); + CoroutineUContext *leaderp = get_ptr_leader(); + + start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, + leaderp->stack, leaderp->stack_size); start_switch_fiber_tsan(&fake_stack_save, self, true); /* true=caller */ siglongjmp(*(sigjmp_buf *)co->entry_arg, 1); } @@ -297,7 +301,7 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, int ret; void *fake_stack_save = NULL; - current = to_; + set_current(to_); ret = sigsetjmp(from->env, 0); if (ret == 0) { @@ -315,18 +319,24 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, Coroutine *qemu_coroutine_self(void) { - if (!current) { - current = &leader.base; + Coroutine *self = get_current(); + CoroutineUContext *leaderp = get_ptr_leader(); + + if (!self) { + self = &leaderp->base; + set_current(self); } #ifdef CONFIG_TSAN - if (!leader.tsan_co_fiber) { - leader.tsan_co_fiber = __tsan_get_current_fiber(); + if (!leaderp->tsan_co_fiber) { + leaderp->tsan_co_fiber = __tsan_get_current_fiber(); } #endif - return current; + return self; } bool qemu_in_coroutine(void) { - return current && current->caller; + Coroutine *self = get_current(); + + return self && self->caller; } From d799e4934a351c0d30aa6571bd697d4d56e9bb57 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 7 Mar 2022 15:38:52 +0000 Subject: [PATCH 10/11] coroutine: use QEMU_DEFINE_STATIC_CO_TLS() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thread-Local Storage variables cannot be used directly from coroutine code because the compiler may optimize TLS variable accesses across qemu_coroutine_yield() calls. When the coroutine is re-entered from another thread the TLS variables from the old thread must no longer be used. Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables. The alloc_pool QSLIST needs a typedef so the return value of get_ptr_alloc_pool() can be stored in a local variable. One example of why this code is necessary: a coroutine that yields before calling qemu_coroutine_create() to create another coroutine is affected by the TLS issue. Signed-off-by: Stefan Hajnoczi Message-Id: <20220307153853.602859-3-stefanha@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Kevin Wolf (cherry picked from commit ac387a08a9c9f6b36757da912f0339c25f421f90) --- util/qemu-coroutine.c | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c index 38fb6d3084..22071dac4b 100644 --- a/util/qemu-coroutine.c +++ b/util/qemu-coroutine.c @@ -18,6 +18,7 @@ #include "qemu/atomic.h" #include "qemu/coroutine.h" #include "qemu/coroutine_int.h" +#include "qemu/coroutine-tls.h" #include "block/aio.h" enum { @@ -27,17 +28,20 @@ enum { /** Free list to speed up creation */ static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); static unsigned int release_pool_size; -static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool); -static __thread unsigned int alloc_pool_size; -static __thread Notifier coroutine_pool_cleanup_notifier; + +typedef QSLIST_HEAD(, Coroutine) CoroutineQSList; +QEMU_DEFINE_STATIC_CO_TLS(CoroutineQSList, alloc_pool); +QEMU_DEFINE_STATIC_CO_TLS(unsigned int, alloc_pool_size); +QEMU_DEFINE_STATIC_CO_TLS(Notifier, coroutine_pool_cleanup_notifier); static void coroutine_pool_cleanup(Notifier *n, void *value) { Coroutine *co; Coroutine *tmp; + CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); - QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) { - QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); + QSLIST_FOREACH_SAFE(co, alloc_pool, pool_next, tmp) { + QSLIST_REMOVE_HEAD(alloc_pool, pool_next); qemu_coroutine_delete(co); } } @@ -47,27 +51,30 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) Coroutine *co = NULL; if (CONFIG_COROUTINE_POOL) { - co = QSLIST_FIRST(&alloc_pool); + CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); + + co = QSLIST_FIRST(alloc_pool); if (!co) { if (release_pool_size > POOL_BATCH_SIZE) { /* Slow path; a good place to register the destructor, too. */ - if (!coroutine_pool_cleanup_notifier.notify) { - coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup; - qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier); + Notifier *notifier = get_ptr_coroutine_pool_cleanup_notifier(); + if (!notifier->notify) { + notifier->notify = coroutine_pool_cleanup; + qemu_thread_atexit_add(notifier); } /* This is not exact; there could be a little skew between * release_pool_size and the actual size of release_pool. But * it is just a heuristic, it does not need to be perfect. */ - alloc_pool_size = qatomic_xchg(&release_pool_size, 0); - QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool); - co = QSLIST_FIRST(&alloc_pool); + set_alloc_pool_size(qatomic_xchg(&release_pool_size, 0)); + QSLIST_MOVE_ATOMIC(alloc_pool, &release_pool); + co = QSLIST_FIRST(alloc_pool); } } if (co) { - QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); - alloc_pool_size--; + QSLIST_REMOVE_HEAD(alloc_pool, pool_next); + set_alloc_pool_size(get_alloc_pool_size() - 1); } } @@ -91,9 +98,9 @@ static void coroutine_delete(Coroutine *co) qatomic_inc(&release_pool_size); return; } - if (alloc_pool_size < POOL_BATCH_SIZE) { - QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next); - alloc_pool_size++; + if (get_alloc_pool_size() < POOL_BATCH_SIZE) { + QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next); + set_alloc_pool_size(get_alloc_pool_size() + 1); return; } } From 6846c336ce5b5b2db348f7452417b04e06b268e5 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 7 Mar 2022 15:38:53 +0000 Subject: [PATCH 11/11] coroutine-win32: use QEMU_DEFINE_STATIC_CO_TLS() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thread-Local Storage variables cannot be used directly from coroutine code because the compiler may optimize TLS variable accesses across qemu_coroutine_yield() calls. When the coroutine is re-entered from another thread the TLS variables from the old thread must no longer be used. Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables. I think coroutine-win32.c could get away with __thread because the variables are only used in situations where either the stale value is correct (current) or outside coroutine context (loading leader when current is NULL). Due to the difficulty of being sure that this is really safe in all scenarios it seems worth converting it anyway. Signed-off-by: Stefan Hajnoczi Message-Id: <20220307153853.602859-4-stefanha@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Kevin Wolf (cherry picked from commit c1fe694357a328c807ae3cc6961c19e923448fcc) --- util/coroutine-win32.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/util/coroutine-win32.c b/util/coroutine-win32.c index de6bd4fd3e..c02a62c896 100644 --- a/util/coroutine-win32.c +++ b/util/coroutine-win32.c @@ -25,6 +25,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "qemu/coroutine_int.h" +#include "qemu/coroutine-tls.h" typedef struct { @@ -34,8 +35,8 @@ typedef struct CoroutineAction action; } CoroutineWin32; -static __thread CoroutineWin32 leader; -static __thread Coroutine *current; +QEMU_DEFINE_STATIC_CO_TLS(CoroutineWin32, leader); +QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current); /* This function is marked noinline to prevent GCC from inlining it * into coroutine_trampoline(). If we allow it to do that then it @@ -52,7 +53,7 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_); CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_); - current = to_; + set_current(to_); to->action = action; SwitchToFiber(to->fiber); @@ -89,14 +90,21 @@ void qemu_coroutine_delete(Coroutine *co_) Coroutine *qemu_coroutine_self(void) { + Coroutine *current = get_current(); + if (!current) { - current = &leader.base; - leader.fiber = ConvertThreadToFiber(NULL); + CoroutineWin32 *leader = get_ptr_leader(); + + current = &leader->base; + set_current(current); + leader->fiber = ConvertThreadToFiber(NULL); } return current; } bool qemu_in_coroutine(void) { + Coroutine *current = get_current(); + return current && current->caller; }