From 209bfd13432a107989ec4884ef56ff30467c10eb Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 22 Feb 2022 14:01:47 +0000 Subject: [PATCH 1/6] tls: add macros for coroutine-safe TLS variables Compiler optimizations can cache TLS values across coroutine yield points, resulting in stale values from the previous thread when a coroutine is re-entered by a new thread. Serge Guelton developed an __attribute__((noinline)) wrapper and tested it with clang and gcc. I formatted his idea according to QEMU's coding style and wrote documentation. The compiler can still optimize based on analyzing noinline code, so an asm volatile barrier with an output constraint is required to prevent unwanted optimizations. Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1952483 Suggested-by: Serge Guelton Signed-off-by: Stefan Hajnoczi Message-Id: <20220222140150.27240-2-stefanha@redhat.com> Signed-off-by: Kevin Wolf (cherry picked from commit 7d29c341c9d402cf0bcb3a3b76fce0c09dd24e94) --- include/qemu/coroutine-tls.h | 165 +++++++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 include/qemu/coroutine-tls.h diff --git a/include/qemu/coroutine-tls.h b/include/qemu/coroutine-tls.h new file mode 100644 index 0000000000..1558a826aa --- /dev/null +++ b/include/qemu/coroutine-tls.h @@ -0,0 +1,165 @@ +/* + * QEMU Thread Local Storage for coroutines + * + * Copyright Red Hat + * + * SPDX-License-Identifier: LGPL-2.1-or-later + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + * It is forbidden to access Thread Local Storage in coroutines because + * compiler optimizations may cause values to be cached across coroutine + * re-entry. Coroutines can run in more than one thread through the course of + * their life, leading bugs when stale TLS values from the wrong thread are + * used as a result of compiler optimization. + * + * An example is: + * + * ..code-block:: c + * :caption: A coroutine that may see the wrong TLS value + * + * static __thread AioContext *current_aio_context; + * ... + * static void coroutine_fn foo(void) + * { + * aio_notify(current_aio_context); + * qemu_coroutine_yield(); + * aio_notify(current_aio_context); // <-- may be stale after yielding! + * } + * + * This header provides macros for safely defining variables in Thread Local + * Storage: + * + * ..code-block:: c + * :caption: A coroutine that safely uses TLS + * + * QEMU_DEFINE_STATIC_CO_TLS(AioContext *, current_aio_context) + * ... + * static void coroutine_fn foo(void) + * { + * aio_notify(get_current_aio_context()); + * qemu_coroutine_yield(); + * aio_notify(get_current_aio_context()); // <-- safe + * } + */ + +#ifndef QEMU_COROUTINE_TLS_H +#define QEMU_COROUTINE_TLS_H + +/* + * To stop the compiler from caching TLS values we define accessor functions + * with __attribute__((noinline)) plus asm volatile("") to prevent + * optimizations that override noinline. + * + * The compiler can still analyze noinline code and make optimizations based on + * that knowledge, so an inline asm output operand is used to prevent + * optimizations that make assumptions about the address of the TLS variable. + * + * This is fragile and ultimately needs to be solved by a mechanism that is + * guaranteed to work by the compiler (e.g. stackless coroutines), but for now + * we use this approach to prevent issues. + */ + +/** + * QEMU_DECLARE_CO_TLS: + * @type: the variable's C type + * @var: the variable name + * + * Declare an extern variable in Thread Local Storage from a header file: + * + * .. code-block:: c + * :caption: Declaring an extern variable in Thread Local Storage + * + * QEMU_DECLARE_CO_TLS(int, my_count) + * ... + * int c = get_my_count(); + * set_my_count(c + 1); + * *get_ptr_my_count() = 0; + * + * This is a coroutine-safe replacement for the __thread keyword and is + * equivalent to the following code: + * + * .. code-block:: c + * :caption: Declaring a TLS variable using __thread + * + * extern __thread int my_count; + * ... + * int c = my_count; + * my_count = c + 1; + * *(&my_count) = 0; + */ +#define QEMU_DECLARE_CO_TLS(type, var) \ + __attribute__((noinline)) type get_##var(void); \ + __attribute__((noinline)) void set_##var(type v); \ + __attribute__((noinline)) type *get_ptr_##var(void); + +/** + * QEMU_DEFINE_CO_TLS: + * @type: the variable's C type + * @var: the variable name + * + * Define a variable in Thread Local Storage that was previously declared from + * a header file with QEMU_DECLARE_CO_TLS(): + * + * .. code-block:: c + * :caption: Defining a variable in Thread Local Storage + * + * QEMU_DEFINE_CO_TLS(int, my_count) + * + * This is a coroutine-safe replacement for the __thread keyword and is + * equivalent to the following code: + * + * .. code-block:: c + * :caption: Defining a TLS variable using __thread + * + * __thread int my_count; + */ +#define QEMU_DEFINE_CO_TLS(type, var) \ + static __thread type co_tls_##var; \ + type get_##var(void) { asm volatile(""); return co_tls_##var; } \ + void set_##var(type v) { asm volatile(""); co_tls_##var = v; } \ + type *get_ptr_##var(void) \ + { type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; } + +/** + * QEMU_DEFINE_STATIC_CO_TLS: + * @type: the variable's C type + * @var: the variable name + * + * Define a static variable in Thread Local Storage: + * + * .. code-block:: c + * :caption: Defining a static variable in Thread Local Storage + * + * QEMU_DEFINE_STATIC_CO_TLS(int, my_count) + * ... + * int c = get_my_count(); + * set_my_count(c + 1); + * *get_ptr_my_count() = 0; + * + * This is a coroutine-safe replacement for the __thread keyword and is + * equivalent to the following code: + * + * .. code-block:: c + * :caption: Defining a static TLS variable using __thread + * + * static __thread int my_count; + * ... + * int c = my_count; + * my_count = c + 1; + * *(&my_count) = 0; + */ +#define QEMU_DEFINE_STATIC_CO_TLS(type, var) \ + static __thread type co_tls_##var; \ + static __attribute__((noinline, unused)) \ + type get_##var(void) \ + { asm volatile(""); return co_tls_##var; } \ + static __attribute__((noinline, unused)) \ + void set_##var(type v) \ + { asm volatile(""); co_tls_##var = v; } \ + static __attribute__((noinline, unused)) \ + type *get_ptr_##var(void) \ + { type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; } + +#endif /* QEMU_COROUTINE_TLS_H */ From 94a652333bcb60292a7d5a4bda4482f1edce5cfa Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 22 Feb 2022 14:01:49 +0000 Subject: [PATCH 2/6] rcu: use coroutine TLS macros RCU may be used from coroutines. Standard __thread variables cannot be used by coroutines. Use the coroutine TLS macros instead. Signed-off-by: Stefan Hajnoczi Message-Id: <20220222140150.27240-4-stefanha@redhat.com> Signed-off-by: Kevin Wolf (cherry picked from commit 17c78154b0ba2237c37f3e4a95140b754cb6ac8b) --- include/qemu/rcu.h | 7 ++++--- tests/unit/rcutorture.c | 10 +++++----- tests/unit/test-rcu-list.c | 4 ++-- util/rcu.c | 8 ++++---- 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h index 515d327cf1..15b7bc2d67 100644 --- a/include/qemu/rcu.h +++ b/include/qemu/rcu.h @@ -28,6 +28,7 @@ #include "qemu/queue.h" #include "qemu/atomic.h" #include "qemu/sys_membarrier.h" +#include "qemu/coroutine-tls.h" #ifdef __cplusplus extern "C" { @@ -68,11 +69,11 @@ struct rcu_reader_data { QLIST_ENTRY(rcu_reader_data) node; }; -extern __thread struct rcu_reader_data rcu_reader; +QEMU_DECLARE_CO_TLS(struct rcu_reader_data, rcu_reader) static inline void rcu_read_lock(void) { - struct rcu_reader_data *p_rcu_reader = &rcu_reader; + struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader(); unsigned ctr; if (p_rcu_reader->depth++ > 0) { @@ -88,7 +89,7 @@ static inline void rcu_read_lock(void) static inline void rcu_read_unlock(void) { - struct rcu_reader_data *p_rcu_reader = &rcu_reader; + struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader(); assert(p_rcu_reader->depth != 0); if (--p_rcu_reader->depth > 0) { diff --git a/tests/unit/rcutorture.c b/tests/unit/rcutorture.c index de6f649058..495a4e6f42 100644 --- a/tests/unit/rcutorture.c +++ b/tests/unit/rcutorture.c @@ -122,7 +122,7 @@ static void *rcu_read_perf_test(void *arg) rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); qatomic_inc(&nthreadsrunning); while (goflag == GOFLAG_INIT) { g_usleep(1000); @@ -148,7 +148,7 @@ static void *rcu_update_perf_test(void *arg) rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); qatomic_inc(&nthreadsrunning); while (goflag == GOFLAG_INIT) { g_usleep(1000); @@ -253,7 +253,7 @@ static void *rcu_read_stress_test(void *arg) rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); while (goflag == GOFLAG_INIT) { g_usleep(1000); } @@ -304,7 +304,7 @@ static void *rcu_update_stress_test(void *arg) struct rcu_stress *cp = qatomic_read(&rcu_stress_current); rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); while (goflag == GOFLAG_INIT) { g_usleep(1000); @@ -347,7 +347,7 @@ static void *rcu_fake_update_stress_test(void *arg) { rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); while (goflag == GOFLAG_INIT) { g_usleep(1000); } diff --git a/tests/unit/test-rcu-list.c b/tests/unit/test-rcu-list.c index 49641e1936..64b81ae058 100644 --- a/tests/unit/test-rcu-list.c +++ b/tests/unit/test-rcu-list.c @@ -171,7 +171,7 @@ static void *rcu_q_reader(void *arg) rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); qatomic_inc(&nthreadsrunning); while (qatomic_read(&goflag) == GOFLAG_INIT) { g_usleep(1000); @@ -206,7 +206,7 @@ static void *rcu_q_updater(void *arg) long long n_removed_local = 0; struct list_element *el, *prev_el; - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); qatomic_inc(&nthreadsrunning); while (qatomic_read(&goflag) == GOFLAG_INIT) { g_usleep(1000); diff --git a/util/rcu.c b/util/rcu.c index 13ac0f75cb..40062b8ed2 100644 --- a/util/rcu.c +++ b/util/rcu.c @@ -64,7 +64,7 @@ static inline int rcu_gp_ongoing(unsigned long *ctr) /* Written to only by each individual reader. Read by both the reader and the * writers. */ -__thread struct rcu_reader_data rcu_reader; +QEMU_DEFINE_CO_TLS(struct rcu_reader_data, rcu_reader) /* Protected by rcu_registry_lock. */ typedef QLIST_HEAD(, rcu_reader_data) ThreadList; @@ -350,16 +350,16 @@ void drain_call_rcu(void) void rcu_register_thread(void) { - assert(rcu_reader.ctr == 0); + assert(get_ptr_rcu_reader()->ctr == 0); qemu_mutex_lock(&rcu_registry_lock); - QLIST_INSERT_HEAD(®istry, &rcu_reader, node); + QLIST_INSERT_HEAD(®istry, get_ptr_rcu_reader(), node); qemu_mutex_unlock(&rcu_registry_lock); } void rcu_unregister_thread(void) { qemu_mutex_lock(&rcu_registry_lock); - QLIST_REMOVE(&rcu_reader, node); + QLIST_REMOVE(get_ptr_rcu_reader(), node); qemu_mutex_unlock(&rcu_registry_lock); } From 16be066ff798ea82a18f99510b50920309f887b3 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 22 Feb 2022 14:01:50 +0000 Subject: [PATCH 3/6] cpus: use coroutine TLS macros for iothread_locked qemu_mutex_iothread_locked() may be used from coroutines. Standard __thread variables cannot be used by coroutines. Use the coroutine TLS macros instead. Signed-off-by: Stefan Hajnoczi Message-Id: <20220222140150.27240-5-stefanha@redhat.com> Signed-off-by: Kevin Wolf (cherry picked from commit d5d2b15ecf62c662985983ca065ddeeec48fd248) --- softmmu/cpus.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/softmmu/cpus.c b/softmmu/cpus.c index a7ee431187..581b7d73da 100644 --- a/softmmu/cpus.c +++ b/softmmu/cpus.c @@ -25,6 +25,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "monitor/monitor.h" +#include "qemu/coroutine-tls.h" #include "qapi/error.h" #include "qapi/qapi-commands-machine.h" #include "qapi/qapi-commands-misc.h" @@ -472,11 +473,11 @@ bool qemu_in_vcpu_thread(void) return current_cpu && qemu_cpu_is_self(current_cpu); } -static __thread bool iothread_locked = false; +QEMU_DEFINE_STATIC_CO_TLS(bool, iothread_locked) bool qemu_mutex_iothread_locked(void) { - return iothread_locked; + return get_iothread_locked(); } /* @@ -489,13 +490,13 @@ void qemu_mutex_lock_iothread_impl(const char *file, int line) g_assert(!qemu_mutex_iothread_locked()); bql_lock(&qemu_global_mutex, file, line); - iothread_locked = true; + set_iothread_locked(true); } void qemu_mutex_unlock_iothread(void) { g_assert(qemu_mutex_iothread_locked()); - iothread_locked = false; + set_iothread_locked(false); qemu_mutex_unlock(&qemu_global_mutex); } From 25e5a1d5df3878a9d30ecc33b43fe2a9282c2552 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 7 Mar 2022 15:38:51 +0000 Subject: [PATCH 4/6] coroutine-ucontext: use QEMU_DEFINE_STATIC_CO_TLS() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thread-Local Storage variables cannot be used directly from coroutine code because the compiler may optimize TLS variable accesses across qemu_coroutine_yield() calls. When the coroutine is re-entered from another thread the TLS variables from the old thread must no longer be used. Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables. Signed-off-by: Stefan Hajnoczi Message-Id: <20220307153853.602859-2-stefanha@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Kevin Wolf (cherry picked from commit 34145a307d849d0b6734d0222a7aa0bb9eef7407) --- util/coroutine-ucontext.c | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c index 904b375192..127d5a13c8 100644 --- a/util/coroutine-ucontext.c +++ b/util/coroutine-ucontext.c @@ -25,6 +25,7 @@ #include "qemu/osdep.h" #include #include "qemu/coroutine_int.h" +#include "qemu/coroutine-tls.h" #ifdef CONFIG_VALGRIND_H #include @@ -66,8 +67,8 @@ typedef struct { /** * Per-thread coroutine bookkeeping */ -static __thread CoroutineUContext leader; -static __thread Coroutine *current; +QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current); +QEMU_DEFINE_STATIC_CO_TLS(CoroutineUContext, leader); /* * va_args to makecontext() must be type 'int', so passing @@ -97,14 +98,15 @@ static inline __attribute__((always_inline)) void finish_switch_fiber(void *fake_stack_save) { #ifdef CONFIG_ASAN + CoroutineUContext *leaderp = get_ptr_leader(); const void *bottom_old; size_t size_old; __sanitizer_finish_switch_fiber(fake_stack_save, &bottom_old, &size_old); - if (!leader.stack) { - leader.stack = (void *)bottom_old; - leader.stack_size = size_old; + if (!leaderp->stack) { + leaderp->stack = (void *)bottom_old; + leaderp->stack_size = size_old; } #endif #ifdef CONFIG_TSAN @@ -161,8 +163,10 @@ static void coroutine_trampoline(int i0, int i1) /* Initialize longjmp environment and switch back the caller */ if (!sigsetjmp(self->env, 0)) { - start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, leader.stack, - leader.stack_size); + CoroutineUContext *leaderp = get_ptr_leader(); + + start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, + leaderp->stack, leaderp->stack_size); start_switch_fiber_tsan(&fake_stack_save, self, true); /* true=caller */ siglongjmp(*(sigjmp_buf *)co->entry_arg, 1); } @@ -297,7 +301,7 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, int ret; void *fake_stack_save = NULL; - current = to_; + set_current(to_); ret = sigsetjmp(from->env, 0); if (ret == 0) { @@ -315,18 +319,24 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, Coroutine *qemu_coroutine_self(void) { - if (!current) { - current = &leader.base; + Coroutine *self = get_current(); + CoroutineUContext *leaderp = get_ptr_leader(); + + if (!self) { + self = &leaderp->base; + set_current(self); } #ifdef CONFIG_TSAN - if (!leader.tsan_co_fiber) { - leader.tsan_co_fiber = __tsan_get_current_fiber(); + if (!leaderp->tsan_co_fiber) { + leaderp->tsan_co_fiber = __tsan_get_current_fiber(); } #endif - return current; + return self; } bool qemu_in_coroutine(void) { - return current && current->caller; + Coroutine *self = get_current(); + + return self && self->caller; } From 23b5b088f2f0db1effc2c8ba115e72537f48be1d Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 7 Mar 2022 15:38:52 +0000 Subject: [PATCH 5/6] coroutine: use QEMU_DEFINE_STATIC_CO_TLS() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thread-Local Storage variables cannot be used directly from coroutine code because the compiler may optimize TLS variable accesses across qemu_coroutine_yield() calls. When the coroutine is re-entered from another thread the TLS variables from the old thread must no longer be used. Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables. The alloc_pool QSLIST needs a typedef so the return value of get_ptr_alloc_pool() can be stored in a local variable. One example of why this code is necessary: a coroutine that yields before calling qemu_coroutine_create() to create another coroutine is affected by the TLS issue. Signed-off-by: Stefan Hajnoczi Message-Id: <20220307153853.602859-3-stefanha@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Kevin Wolf (cherry picked from commit ac387a08a9c9f6b36757da912f0339c25f421f90) --- util/qemu-coroutine.c | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c index 38fb6d3084..22071dac4b 100644 --- a/util/qemu-coroutine.c +++ b/util/qemu-coroutine.c @@ -18,6 +18,7 @@ #include "qemu/atomic.h" #include "qemu/coroutine.h" #include "qemu/coroutine_int.h" +#include "qemu/coroutine-tls.h" #include "block/aio.h" enum { @@ -27,17 +28,20 @@ enum { /** Free list to speed up creation */ static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); static unsigned int release_pool_size; -static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool); -static __thread unsigned int alloc_pool_size; -static __thread Notifier coroutine_pool_cleanup_notifier; + +typedef QSLIST_HEAD(, Coroutine) CoroutineQSList; +QEMU_DEFINE_STATIC_CO_TLS(CoroutineQSList, alloc_pool); +QEMU_DEFINE_STATIC_CO_TLS(unsigned int, alloc_pool_size); +QEMU_DEFINE_STATIC_CO_TLS(Notifier, coroutine_pool_cleanup_notifier); static void coroutine_pool_cleanup(Notifier *n, void *value) { Coroutine *co; Coroutine *tmp; + CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); - QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) { - QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); + QSLIST_FOREACH_SAFE(co, alloc_pool, pool_next, tmp) { + QSLIST_REMOVE_HEAD(alloc_pool, pool_next); qemu_coroutine_delete(co); } } @@ -47,27 +51,30 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) Coroutine *co = NULL; if (CONFIG_COROUTINE_POOL) { - co = QSLIST_FIRST(&alloc_pool); + CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); + + co = QSLIST_FIRST(alloc_pool); if (!co) { if (release_pool_size > POOL_BATCH_SIZE) { /* Slow path; a good place to register the destructor, too. */ - if (!coroutine_pool_cleanup_notifier.notify) { - coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup; - qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier); + Notifier *notifier = get_ptr_coroutine_pool_cleanup_notifier(); + if (!notifier->notify) { + notifier->notify = coroutine_pool_cleanup; + qemu_thread_atexit_add(notifier); } /* This is not exact; there could be a little skew between * release_pool_size and the actual size of release_pool. But * it is just a heuristic, it does not need to be perfect. */ - alloc_pool_size = qatomic_xchg(&release_pool_size, 0); - QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool); - co = QSLIST_FIRST(&alloc_pool); + set_alloc_pool_size(qatomic_xchg(&release_pool_size, 0)); + QSLIST_MOVE_ATOMIC(alloc_pool, &release_pool); + co = QSLIST_FIRST(alloc_pool); } } if (co) { - QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); - alloc_pool_size--; + QSLIST_REMOVE_HEAD(alloc_pool, pool_next); + set_alloc_pool_size(get_alloc_pool_size() - 1); } } @@ -91,9 +98,9 @@ static void coroutine_delete(Coroutine *co) qatomic_inc(&release_pool_size); return; } - if (alloc_pool_size < POOL_BATCH_SIZE) { - QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next); - alloc_pool_size++; + if (get_alloc_pool_size() < POOL_BATCH_SIZE) { + QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next); + set_alloc_pool_size(get_alloc_pool_size() + 1); return; } } From c5bb9b8bb891c955420f1e31023d058738b9546d Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 7 Mar 2022 15:38:53 +0000 Subject: [PATCH 6/6] coroutine-win32: use QEMU_DEFINE_STATIC_CO_TLS() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thread-Local Storage variables cannot be used directly from coroutine code because the compiler may optimize TLS variable accesses across qemu_coroutine_yield() calls. When the coroutine is re-entered from another thread the TLS variables from the old thread must no longer be used. Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables. I think coroutine-win32.c could get away with __thread because the variables are only used in situations where either the stale value is correct (current) or outside coroutine context (loading leader when current is NULL). Due to the difficulty of being sure that this is really safe in all scenarios it seems worth converting it anyway. Signed-off-by: Stefan Hajnoczi Message-Id: <20220307153853.602859-4-stefanha@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Kevin Wolf (cherry picked from commit c1fe694357a328c807ae3cc6961c19e923448fcc) --- util/coroutine-win32.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/util/coroutine-win32.c b/util/coroutine-win32.c index de6bd4fd3e..c02a62c896 100644 --- a/util/coroutine-win32.c +++ b/util/coroutine-win32.c @@ -25,6 +25,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "qemu/coroutine_int.h" +#include "qemu/coroutine-tls.h" typedef struct { @@ -34,8 +35,8 @@ typedef struct CoroutineAction action; } CoroutineWin32; -static __thread CoroutineWin32 leader; -static __thread Coroutine *current; +QEMU_DEFINE_STATIC_CO_TLS(CoroutineWin32, leader); +QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current); /* This function is marked noinline to prevent GCC from inlining it * into coroutine_trampoline(). If we allow it to do that then it @@ -52,7 +53,7 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_); CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_); - current = to_; + set_current(to_); to->action = action; SwitchToFiber(to->fiber); @@ -89,14 +90,21 @@ void qemu_coroutine_delete(Coroutine *co_) Coroutine *qemu_coroutine_self(void) { + Coroutine *current = get_current(); + if (!current) { - current = &leader.base; - leader.fiber = ConvertThreadToFiber(NULL); + CoroutineWin32 *leader = get_ptr_leader(); + + current = &leader->base; + set_current(current); + leader->fiber = ConvertThreadToFiber(NULL); } return current; } bool qemu_in_coroutine(void) { + Coroutine *current = get_current(); + return current && current->caller; }