Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cherry pick LTO fixes #260

Merged
merged 6 commits into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 165 additions & 0 deletions include/qemu/coroutine-tls.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
/*
* QEMU Thread Local Storage for coroutines
*
* Copyright Red Hat
*
* SPDX-License-Identifier: LGPL-2.1-or-later
*
* This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
* See the COPYING.LIB file in the top-level directory.
*
* It is forbidden to access Thread Local Storage in coroutines because
* compiler optimizations may cause values to be cached across coroutine
* re-entry. Coroutines can run in more than one thread through the course of
* their life, leading bugs when stale TLS values from the wrong thread are
* used as a result of compiler optimization.
*
* An example is:
*
* ..code-block:: c
* :caption: A coroutine that may see the wrong TLS value
*
* static __thread AioContext *current_aio_context;
* ...
* static void coroutine_fn foo(void)
* {
* aio_notify(current_aio_context);
* qemu_coroutine_yield();
* aio_notify(current_aio_context); // <-- may be stale after yielding!
* }
*
* This header provides macros for safely defining variables in Thread Local
* Storage:
*
* ..code-block:: c
* :caption: A coroutine that safely uses TLS
*
* QEMU_DEFINE_STATIC_CO_TLS(AioContext *, current_aio_context)
* ...
* static void coroutine_fn foo(void)
* {
* aio_notify(get_current_aio_context());
* qemu_coroutine_yield();
* aio_notify(get_current_aio_context()); // <-- safe
* }
*/

#ifndef QEMU_COROUTINE_TLS_H
#define QEMU_COROUTINE_TLS_H

/*
* To stop the compiler from caching TLS values we define accessor functions
* with __attribute__((noinline)) plus asm volatile("") to prevent
* optimizations that override noinline.
*
* The compiler can still analyze noinline code and make optimizations based on
* that knowledge, so an inline asm output operand is used to prevent
* optimizations that make assumptions about the address of the TLS variable.
*
* This is fragile and ultimately needs to be solved by a mechanism that is
* guaranteed to work by the compiler (e.g. stackless coroutines), but for now
* we use this approach to prevent issues.
*/

/**
* QEMU_DECLARE_CO_TLS:
* @type: the variable's C type
* @var: the variable name
*
* Declare an extern variable in Thread Local Storage from a header file:
*
* .. code-block:: c
* :caption: Declaring an extern variable in Thread Local Storage
*
* QEMU_DECLARE_CO_TLS(int, my_count)
* ...
* int c = get_my_count();
* set_my_count(c + 1);
* *get_ptr_my_count() = 0;
*
* This is a coroutine-safe replacement for the __thread keyword and is
* equivalent to the following code:
*
* .. code-block:: c
* :caption: Declaring a TLS variable using __thread
*
* extern __thread int my_count;
* ...
* int c = my_count;
* my_count = c + 1;
* *(&my_count) = 0;
*/
#define QEMU_DECLARE_CO_TLS(type, var) \
__attribute__((noinline)) type get_##var(void); \
__attribute__((noinline)) void set_##var(type v); \
__attribute__((noinline)) type *get_ptr_##var(void);

/**
* QEMU_DEFINE_CO_TLS:
* @type: the variable's C type
* @var: the variable name
*
* Define a variable in Thread Local Storage that was previously declared from
* a header file with QEMU_DECLARE_CO_TLS():
*
* .. code-block:: c
* :caption: Defining a variable in Thread Local Storage
*
* QEMU_DEFINE_CO_TLS(int, my_count)
*
* This is a coroutine-safe replacement for the __thread keyword and is
* equivalent to the following code:
*
* .. code-block:: c
* :caption: Defining a TLS variable using __thread
*
* __thread int my_count;
*/
#define QEMU_DEFINE_CO_TLS(type, var) \
static __thread type co_tls_##var; \
type get_##var(void) { asm volatile(""); return co_tls_##var; } \
void set_##var(type v) { asm volatile(""); co_tls_##var = v; } \
type *get_ptr_##var(void) \
{ type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; }

/**
* QEMU_DEFINE_STATIC_CO_TLS:
* @type: the variable's C type
* @var: the variable name
*
* Define a static variable in Thread Local Storage:
*
* .. code-block:: c
* :caption: Defining a static variable in Thread Local Storage
*
* QEMU_DEFINE_STATIC_CO_TLS(int, my_count)
* ...
* int c = get_my_count();
* set_my_count(c + 1);
* *get_ptr_my_count() = 0;
*
* This is a coroutine-safe replacement for the __thread keyword and is
* equivalent to the following code:
*
* .. code-block:: c
* :caption: Defining a static TLS variable using __thread
*
* static __thread int my_count;
* ...
* int c = my_count;
* my_count = c + 1;
* *(&my_count) = 0;
*/
#define QEMU_DEFINE_STATIC_CO_TLS(type, var) \
static __thread type co_tls_##var; \
static __attribute__((noinline, unused)) \
type get_##var(void) \
{ asm volatile(""); return co_tls_##var; } \
static __attribute__((noinline, unused)) \
void set_##var(type v) \
{ asm volatile(""); co_tls_##var = v; } \
static __attribute__((noinline, unused)) \
type *get_ptr_##var(void) \
{ type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; }

#endif /* QEMU_COROUTINE_TLS_H */
7 changes: 4 additions & 3 deletions include/qemu/rcu.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "qemu/queue.h"
#include "qemu/atomic.h"
#include "qemu/sys_membarrier.h"
#include "qemu/coroutine-tls.h"

#ifdef __cplusplus
extern "C" {
Expand Down Expand Up @@ -68,11 +69,11 @@ struct rcu_reader_data {
QLIST_ENTRY(rcu_reader_data) node;
};

extern __thread struct rcu_reader_data rcu_reader;
QEMU_DECLARE_CO_TLS(struct rcu_reader_data, rcu_reader)

static inline void rcu_read_lock(void)
{
struct rcu_reader_data *p_rcu_reader = &rcu_reader;
struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader();
unsigned ctr;

if (p_rcu_reader->depth++ > 0) {
Expand All @@ -88,7 +89,7 @@ static inline void rcu_read_lock(void)

static inline void rcu_read_unlock(void)
{
struct rcu_reader_data *p_rcu_reader = &rcu_reader;
struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader();

assert(p_rcu_reader->depth != 0);
if (--p_rcu_reader->depth > 0) {
Expand Down
9 changes: 5 additions & 4 deletions softmmu/cpus.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "monitor/monitor.h"
#include "qemu/coroutine-tls.h"
#include "qapi/error.h"
#include "qapi/qapi-commands-machine.h"
#include "qapi/qapi-commands-misc.h"
Expand Down Expand Up @@ -472,11 +473,11 @@ bool qemu_in_vcpu_thread(void)
return current_cpu && qemu_cpu_is_self(current_cpu);
}

static __thread bool iothread_locked = false;
QEMU_DEFINE_STATIC_CO_TLS(bool, iothread_locked)

bool qemu_mutex_iothread_locked(void)
{
return iothread_locked;
return get_iothread_locked();
}

/*
Expand All @@ -489,13 +490,13 @@ void qemu_mutex_lock_iothread_impl(const char *file, int line)

g_assert(!qemu_mutex_iothread_locked());
bql_lock(&qemu_global_mutex, file, line);
iothread_locked = true;
set_iothread_locked(true);
}

void qemu_mutex_unlock_iothread(void)
{
g_assert(qemu_mutex_iothread_locked());
iothread_locked = false;
set_iothread_locked(false);
qemu_mutex_unlock(&qemu_global_mutex);
}

Expand Down
10 changes: 5 additions & 5 deletions tests/unit/rcutorture.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ static void *rcu_read_perf_test(void *arg)

rcu_register_thread();

*(struct rcu_reader_data **)arg = &rcu_reader;
*(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
qatomic_inc(&nthreadsrunning);
while (goflag == GOFLAG_INIT) {
g_usleep(1000);
Expand All @@ -148,7 +148,7 @@ static void *rcu_update_perf_test(void *arg)

rcu_register_thread();

*(struct rcu_reader_data **)arg = &rcu_reader;
*(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
qatomic_inc(&nthreadsrunning);
while (goflag == GOFLAG_INIT) {
g_usleep(1000);
Expand Down Expand Up @@ -253,7 +253,7 @@ static void *rcu_read_stress_test(void *arg)

rcu_register_thread();

*(struct rcu_reader_data **)arg = &rcu_reader;
*(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
while (goflag == GOFLAG_INIT) {
g_usleep(1000);
}
Expand Down Expand Up @@ -304,7 +304,7 @@ static void *rcu_update_stress_test(void *arg)
struct rcu_stress *cp = qatomic_read(&rcu_stress_current);

rcu_register_thread();
*(struct rcu_reader_data **)arg = &rcu_reader;
*(struct rcu_reader_data **)arg = get_ptr_rcu_reader();

while (goflag == GOFLAG_INIT) {
g_usleep(1000);
Expand Down Expand Up @@ -347,7 +347,7 @@ static void *rcu_fake_update_stress_test(void *arg)
{
rcu_register_thread();

*(struct rcu_reader_data **)arg = &rcu_reader;
*(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
while (goflag == GOFLAG_INIT) {
g_usleep(1000);
}
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/test-rcu-list.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ static void *rcu_q_reader(void *arg)

rcu_register_thread();

*(struct rcu_reader_data **)arg = &rcu_reader;
*(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
qatomic_inc(&nthreadsrunning);
while (qatomic_read(&goflag) == GOFLAG_INIT) {
g_usleep(1000);
Expand Down Expand Up @@ -206,7 +206,7 @@ static void *rcu_q_updater(void *arg)
long long n_removed_local = 0;
struct list_element *el, *prev_el;

*(struct rcu_reader_data **)arg = &rcu_reader;
*(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
qatomic_inc(&nthreadsrunning);
while (qatomic_read(&goflag) == GOFLAG_INIT) {
g_usleep(1000);
Expand Down
38 changes: 24 additions & 14 deletions util/coroutine-ucontext.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "qemu/osdep.h"
#include <ucontext.h>
#include "qemu/coroutine_int.h"
#include "qemu/coroutine-tls.h"

#ifdef CONFIG_VALGRIND_H
#include <valgrind/valgrind.h>
Expand Down Expand Up @@ -66,8 +67,8 @@ typedef struct {
/**
* Per-thread coroutine bookkeeping
*/
static __thread CoroutineUContext leader;
static __thread Coroutine *current;
QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current);
QEMU_DEFINE_STATIC_CO_TLS(CoroutineUContext, leader);

/*
* va_args to makecontext() must be type 'int', so passing
Expand Down Expand Up @@ -97,14 +98,15 @@ static inline __attribute__((always_inline))
void finish_switch_fiber(void *fake_stack_save)
{
#ifdef CONFIG_ASAN
CoroutineUContext *leaderp = get_ptr_leader();
const void *bottom_old;
size_t size_old;

__sanitizer_finish_switch_fiber(fake_stack_save, &bottom_old, &size_old);

if (!leader.stack) {
leader.stack = (void *)bottom_old;
leader.stack_size = size_old;
if (!leaderp->stack) {
leaderp->stack = (void *)bottom_old;
leaderp->stack_size = size_old;
}
#endif
#ifdef CONFIG_TSAN
Expand Down Expand Up @@ -161,8 +163,10 @@ static void coroutine_trampoline(int i0, int i1)

/* Initialize longjmp environment and switch back the caller */
if (!sigsetjmp(self->env, 0)) {
start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, leader.stack,
leader.stack_size);
CoroutineUContext *leaderp = get_ptr_leader();

start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save,
leaderp->stack, leaderp->stack_size);
start_switch_fiber_tsan(&fake_stack_save, self, true); /* true=caller */
siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
}
Expand Down Expand Up @@ -297,7 +301,7 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
int ret;
void *fake_stack_save = NULL;

current = to_;
set_current(to_);

ret = sigsetjmp(from->env, 0);
if (ret == 0) {
Expand All @@ -315,18 +319,24 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,

Coroutine *qemu_coroutine_self(void)
{
if (!current) {
current = &leader.base;
Coroutine *self = get_current();
CoroutineUContext *leaderp = get_ptr_leader();

if (!self) {
self = &leaderp->base;
set_current(self);
}
#ifdef CONFIG_TSAN
if (!leader.tsan_co_fiber) {
leader.tsan_co_fiber = __tsan_get_current_fiber();
if (!leaderp->tsan_co_fiber) {
leaderp->tsan_co_fiber = __tsan_get_current_fiber();
}
#endif
return current;
return self;
}

bool qemu_in_coroutine(void)
{
return current && current->caller;
Coroutine *self = get_current();

return self && self->caller;
}
Loading
Loading