diff --git a/target/cheri-common/cheri_tagmem.c b/target/cheri-common/cheri_tagmem.c
index b5a879ec5d6..1e24f457f0c 100644
--- a/target/cheri-common/cheri_tagmem.c
+++ b/target/cheri-common/cheri_tagmem.c
@@ -36,6 +36,7 @@
 #include "exec/exec-all.h"
 #include "exec/log.h"
 #include "exec/ramblock.h"
+#include "hw/boards.h"
 #include "cheri_defs.h"
 #include "cheri-helper-utils.h"
 // XXX: use hbitmap? Or a different data structure?
@@ -73,6 +74,19 @@
  * used.
  */
 
+static bool _need_concurrent_tags = false;
+static bool _need_concurrent_tags_initialized = false;
+static inline QEMU_ALWAYS_INLINE bool need_concurrent_tags(void)
+{
+    cheri_debug_assert(_need_concurrent_tags_initialized);
+    /*
+     * TODO: can parallel_cpus change at runtime? If not we don't need the
+     * separate variable.
+     */
+    cheri_debug_assert(_need_concurrent_tags == parallel_cpus);
+    return _need_concurrent_tags;
+}
+
 // Define to do some extra checks around spinlocks
 //#define DEBUG_SPIN_LOCKS
 
@@ -290,7 +304,7 @@ static void lock_tag_write_tag_and_release(lock_tag *lock, bool tag)
 static bool lock_tag_read(lock_tag *lock)
 {
 #ifdef CONFIG_DEBUG_TCG
-    assert(!parallel_cpus ||
+    assert(!need_concurrent_tags() ||
            (lock->as_int & (LOCKTAG_MASK_READERS | LOCKTAG_MASK_WRITE_LOCKED)));
 #endif
     return lock->as_int & LOCKTAG_MASK_TAG;
@@ -300,7 +314,7 @@ static bool lock_tag_write(lock_tag *lock, bool tag, bool check_locked)
 {
 #ifdef CONFIG_DEBUG_TCG
     if (check_locked)
-        assert(!parallel_cpus || (lock->as_int & LOCKTAG_MASK_WRITE_LOCKED));
+        assert(!need_concurrent_tags() || (lock->as_int & LOCKTAG_MASK_WRITE_LOCKED));
 #endif
     bool old = lock->as_int & LOCKTAG_MASK_TAG;
     lock->as_int = (lock->as_int & ~LOCKTAG_MASK_TAG) | tag;
@@ -309,9 +323,9 @@ static bool lock_tag_write(lock_tag *lock, bool tag, bool check_locked)
 
 typedef struct CheriTagBlock {
     // It would be silly to use locks for non-mttcg. So support both formats and
-    // use one or the other depending on qemu_tcg_mttcg_enabled()
+    // use one or the other depending on need_concurrent_tags()
     // It looks like single stepping can be turned on/off, no probably best
-    // not to use parallel_cpus.
+    // not to use need_concurrent_tags().
     union {
         DECLARE_BITMAP(tag_bitmap, CAP_TAGBLK_SIZE);
         lock_tag locked_tags[CAP_TAGBLK_SIZE];
@@ -323,7 +337,7 @@ static CheriTagBlock *cheri_tag_new_tagblk(RAMBlock *ram, uint64_t tagidx)
 {
     CheriTagBlock *tagblk, *old;
 
-    size_t size = qemu_tcg_mttcg_enabled()
+    size_t size = need_concurrent_tags()
                       ? sizeof(((CheriTagBlock *)0)->locked_tags)
                       : sizeof(((CheriTagBlock *)0)->tag_bitmap);
 
@@ -361,7 +375,7 @@ static inline QEMU_ALWAYS_INLINE CheriTagBlock *cheri_tag_block(size_t tag_index
 static inline QEMU_ALWAYS_INLINE bool tagmem_get_tag(void *tagmem, size_t index,
                                                      tag_reader_lock_t *lock)
 {
-    if (qemu_tcg_mttcg_enabled()) {
+    if (need_concurrent_tags()) {
         lock_tag *locktag = (lock_tag *)tagmem + index;
         if (lock) {
             *lock = (tag_reader_lock_t)locktag;
@@ -405,7 +419,7 @@ static inline QEMU_ALWAYS_INLINE int
 tagmem_get_tag_many(void *tagmem, size_t index, bool take_lock)
 {
     unsigned long result;
-    if (qemu_tcg_mttcg_enabled()) {
+    if (need_concurrent_tags()) {
         lock_tag *lock = ((lock_tag *)tagmem) + index;
         if (!take_lock) {
             // If we don't need the lock we can just read a whole bunch of
@@ -436,7 +450,7 @@ static inline QEMU_ALWAYS_INLINE void tagmem_set_tag(void *tagmem, size_t index,
                                                      tag_writer_lock_t *lock,
                                                      bool lock_only)
 {
-    if (qemu_tcg_mttcg_enabled()) {
+    if (need_concurrent_tags()) {
         lock_tag *lockTag = (lock_tag *)tagmem + index;
         if (lock) {
             *lock = (tag_writer_lock_t)(lockTag);
@@ -457,7 +471,7 @@ static inline QEMU_ALWAYS_INLINE void tagmem_set_tag(void *tagmem, size_t index,
 static inline QEMU_ALWAYS_INLINE void
 tagmem_set_tag_many(void *tagmem, size_t index, uint8_t tags, bool take_lock)
 {
-    if (qemu_tcg_mttcg_enabled()) {
+    if (need_concurrent_tags()) {
         lock_tag *lock = ((lock_tag *)tagmem) + index;
 
         // TODO: This is only used by morello, but when I merged this I did
@@ -515,7 +529,7 @@ static inline QEMU_ALWAYS_INLINE void tagmem_clear_tag(void *tagmem,
                                                        tag_writer_lock_t *lock,
                                                        bool lock_only)
 {
-    if (qemu_tcg_mttcg_enabled()) {
+    if (need_concurrent_tags()) {
         lock_tag *lockTag = (lock_tag *)tagmem + index;
         if (lock) {
             *lock = (tag_writer_lock_t)lockTag;
@@ -555,6 +569,17 @@ void cheri_tag_init(MemoryRegion *mr, uint64_t memory_size)
     assert(memory_region_size(mr) == memory_size &&
            "Incorrect tag mem size passed?");
 
+    if (!_need_concurrent_tags_initialized) {
+        _need_concurrent_tags =
+            qemu_tcg_mttcg_enabled() && current_machine->smp.max_cpus > 1;
+        _need_concurrent_tags_initialized = true;
+    } else {
+        assert(_need_concurrent_tags ==
+               (qemu_tcg_mttcg_enabled() && current_machine->smp.max_cpus > 1));
+    }
+    info_report("%s: need_concurrent_tags()=%d, mttcg=%d, max_cpus=%d",
+                __func__, need_concurrent_tags(), qemu_tcg_mttcg_enabled(),
+                current_machine->smp.max_cpus);
     size_t cheri_ntagblks = num_tagblocks(mr->ram_block);
     mr->ram_block->cheri_tags =
         g_malloc0(cheri_ntagblks * sizeof(CheriTagBlock *));
@@ -605,7 +630,7 @@ void *cheri_tagmem_for_addr(CPUArchState *env, target_ulong vaddr,
 
     if (tagblk != NULL) {
         const size_t tagblk_index = CAP_TAGBLK_IDX(tag);
-        if (qemu_tcg_mttcg_enabled()) {
+        if (need_concurrent_tags()) {
             return tagblk->locked_tags + tagblk_index;
         } else {
             return tagblk->tag_bitmap + BIT_WORD(tagblk_index);
@@ -834,7 +859,7 @@ void cheri_tag_phys_invalidate(CPUArchState *env, RAMBlock *ram,
             const size_t tagblk_index = CAP_TAGBLK_IDX(tag);
             if (unlikely(env && qemu_log_instr_enabled(env))) {
                 bool old_tag =
-                    qemu_tcg_mttcg_enabled()
+                    need_concurrent_tags()
                         ? tagblock_get_locktag(tagblk, tagblk_index, NULL)
                         : tagblock_get_tag(tagblk, tagblk_index);
                 if (vaddr) {
@@ -854,7 +879,7 @@ void cheri_tag_phys_invalidate(CPUArchState *env, RAMBlock *ram,
                 }
             }
 
-            if (qemu_tcg_mttcg_enabled()) {
+            if (need_concurrent_tags()) {
                 tagblock_clear_locktag(tagblk, tagblk_index, false, false);
             } else {
                 tagblock_clear_tag(tagblk, tagblk_index);