Skip to content

Commit

Permalink
Concurrent tags: use parallel_cpus instead of qemu_tcg_mttcg_enabled()
Browse files Browse the repository at this point in the history
qemu_tcg_mttcg_enabled() will return true even when we only have a
single emulated CPU. This adds noticeable overhead to a CheriBSD boot without
`-smp` (243s instead of 215s) to boot.
Use an immutable view of `parallel_cpus` instead to avoid locking if max_cpu==1.
  • Loading branch information
arichardson committed Jul 20, 2021
1 parent 6c66a34 commit 28b843a
Showing 1 changed file with 38 additions and 13 deletions.
51 changes: 38 additions & 13 deletions target/cheri-common/cheri_tagmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "exec/exec-all.h"
#include "exec/log.h"
#include "exec/ramblock.h"
#include "hw/boards.h"
#include "cheri_defs.h"
#include "cheri-helper-utils.h"
// XXX: use hbitmap? Or a different data structure?
Expand Down Expand Up @@ -73,6 +74,19 @@
* used.
*/

static bool _need_concurrent_tags = false;
static bool _need_concurrent_tags_initialized = false;
static inline QEMU_ALWAYS_INLINE bool need_concurrent_tags(void)
{
cheri_debug_assert(_need_concurrent_tags_initialized);
/*
* TODO: can parallel_cpus change at runtime? If not we don't need the
* separate variable.
*/
cheri_debug_assert(_need_concurrent_tags == parallel_cpus);
return _need_concurrent_tags;
}

// Define to do some extra checks around spinlocks
//#define DEBUG_SPIN_LOCKS

Expand Down Expand Up @@ -290,7 +304,7 @@ static void lock_tag_write_tag_and_release(lock_tag *lock, bool tag)
static bool lock_tag_read(lock_tag *lock)
{
#ifdef CONFIG_DEBUG_TCG
assert(!parallel_cpus ||
assert(!need_concurrent_tags() ||
(lock->as_int & (LOCKTAG_MASK_READERS | LOCKTAG_MASK_WRITE_LOCKED)));
#endif
return lock->as_int & LOCKTAG_MASK_TAG;
Expand All @@ -300,7 +314,7 @@ static bool lock_tag_write(lock_tag *lock, bool tag, bool check_locked)
{
#ifdef CONFIG_DEBUG_TCG
if (check_locked)
assert(!parallel_cpus || (lock->as_int & LOCKTAG_MASK_WRITE_LOCKED));
assert(!need_concurrent_tags() || (lock->as_int & LOCKTAG_MASK_WRITE_LOCKED));
#endif
bool old = lock->as_int & LOCKTAG_MASK_TAG;
lock->as_int = (lock->as_int & ~LOCKTAG_MASK_TAG) | tag;
Expand All @@ -309,9 +323,9 @@ static bool lock_tag_write(lock_tag *lock, bool tag, bool check_locked)

typedef struct CheriTagBlock {
// It would be silly to use locks for non-mttcg. So support both formats and
// use one or the other depending on qemu_tcg_mttcg_enabled()
// use one or the other depending on need_concurrent_tags()
// It looks like single stepping can be turned on/off, no probably best
// not to use parallel_cpus.
// not to use need_concurrent_tags().
union {
DECLARE_BITMAP(tag_bitmap, CAP_TAGBLK_SIZE);
lock_tag locked_tags[CAP_TAGBLK_SIZE];
Expand All @@ -323,7 +337,7 @@ static CheriTagBlock *cheri_tag_new_tagblk(RAMBlock *ram, uint64_t tagidx)
{
CheriTagBlock *tagblk, *old;

size_t size = qemu_tcg_mttcg_enabled()
size_t size = need_concurrent_tags()
? sizeof(((CheriTagBlock *)0)->locked_tags)
: sizeof(((CheriTagBlock *)0)->tag_bitmap);

Expand Down Expand Up @@ -361,7 +375,7 @@ static inline QEMU_ALWAYS_INLINE CheriTagBlock *cheri_tag_block(size_t tag_index
static inline QEMU_ALWAYS_INLINE bool tagmem_get_tag(void *tagmem, size_t index,
tag_reader_lock_t *lock)
{
if (qemu_tcg_mttcg_enabled()) {
if (need_concurrent_tags()) {
lock_tag *locktag = (lock_tag *)tagmem + index;
if (lock) {
*lock = (tag_reader_lock_t)locktag;
Expand Down Expand Up @@ -405,7 +419,7 @@ static inline QEMU_ALWAYS_INLINE int
tagmem_get_tag_many(void *tagmem, size_t index, bool take_lock)
{
unsigned long result;
if (qemu_tcg_mttcg_enabled()) {
if (need_concurrent_tags()) {
lock_tag *lock = ((lock_tag *)tagmem) + index;
if (!take_lock) {
// If we don't need the lock we can just read a whole bunch of
Expand Down Expand Up @@ -436,7 +450,7 @@ static inline QEMU_ALWAYS_INLINE void tagmem_set_tag(void *tagmem, size_t index,
tag_writer_lock_t *lock,
bool lock_only)
{
if (qemu_tcg_mttcg_enabled()) {
if (need_concurrent_tags()) {
lock_tag *lockTag = (lock_tag *)tagmem + index;
if (lock) {
*lock = (tag_writer_lock_t)(lockTag);
Expand All @@ -457,7 +471,7 @@ static inline QEMU_ALWAYS_INLINE void tagmem_set_tag(void *tagmem, size_t index,
static inline QEMU_ALWAYS_INLINE void
tagmem_set_tag_many(void *tagmem, size_t index, uint8_t tags, bool take_lock)
{
if (qemu_tcg_mttcg_enabled()) {
if (need_concurrent_tags()) {
lock_tag *lock = ((lock_tag *)tagmem) + index;

// TODO: This is only used by morello, but when I merged this I did
Expand Down Expand Up @@ -515,7 +529,7 @@ static inline QEMU_ALWAYS_INLINE void tagmem_clear_tag(void *tagmem,
tag_writer_lock_t *lock,
bool lock_only)
{
if (qemu_tcg_mttcg_enabled()) {
if (need_concurrent_tags()) {
lock_tag *lockTag = (lock_tag *)tagmem + index;
if (lock) {
*lock = (tag_writer_lock_t)lockTag;
Expand Down Expand Up @@ -555,6 +569,17 @@ void cheri_tag_init(MemoryRegion *mr, uint64_t memory_size)
assert(memory_region_size(mr) == memory_size &&
"Incorrect tag mem size passed?");

if (!_need_concurrent_tags_initialized) {
_need_concurrent_tags =
qemu_tcg_mttcg_enabled() && current_machine->smp.max_cpus > 1;
_need_concurrent_tags_initialized = true;
} else {
assert(_need_concurrent_tags ==
(qemu_tcg_mttcg_enabled() && current_machine->smp.max_cpus > 1));
}
info_report("%s: need_concurrent_tags()=%d, mttcg=%d, max_cpus=%d",
__func__, need_concurrent_tags(), qemu_tcg_mttcg_enabled(),
current_machine->smp.max_cpus);
size_t cheri_ntagblks = num_tagblocks(mr->ram_block);
mr->ram_block->cheri_tags =
g_malloc0(cheri_ntagblks * sizeof(CheriTagBlock *));
Expand Down Expand Up @@ -605,7 +630,7 @@ void *cheri_tagmem_for_addr(CPUArchState *env, target_ulong vaddr,

if (tagblk != NULL) {
const size_t tagblk_index = CAP_TAGBLK_IDX(tag);
if (qemu_tcg_mttcg_enabled()) {
if (need_concurrent_tags()) {
return tagblk->locked_tags + tagblk_index;
} else {
return tagblk->tag_bitmap + BIT_WORD(tagblk_index);
Expand Down Expand Up @@ -834,7 +859,7 @@ void cheri_tag_phys_invalidate(CPUArchState *env, RAMBlock *ram,
const size_t tagblk_index = CAP_TAGBLK_IDX(tag);
if (unlikely(env && qemu_log_instr_enabled(env))) {
bool old_tag =
qemu_tcg_mttcg_enabled()
need_concurrent_tags()
? tagblock_get_locktag(tagblk, tagblk_index, NULL)
: tagblock_get_tag(tagblk, tagblk_index);
if (vaddr) {
Expand All @@ -854,7 +879,7 @@ void cheri_tag_phys_invalidate(CPUArchState *env, RAMBlock *ram,
}
}

if (qemu_tcg_mttcg_enabled()) {
if (need_concurrent_tags()) {
tagblock_clear_locktag(tagblk, tagblk_index, false, false);
} else {
tagblock_clear_tag(tagblk, tagblk_index);
Expand Down

0 comments on commit 28b843a

Please sign in to comment.